diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cdd5404fcf..fb0154b969 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -546,3 +546,233 @@ jobs:
       - name: Test
         run: ../../../b2 toolset=$TOOLSET ${{ matrix.suite }} define=CI_SUPPRESS_KNOWN_ISSUES define=SLOW_COMPILER define=BOOST_MATH_STANDALONE define=BOOST_MP_STANDALONE
         working-directory: ../boost-root/libs/math/test
+
+  posix-cmake-test:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-22.04
+
+    runs-on: ${{matrix.os}}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install packages
+        if: matrix.install
+        run: sudo apt install ${{matrix.install}} libgmp-dev libmpfr-dev libfftw3-dev
+
+      - name: Setup Boost
+        run: |
+          echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
+          LIBRARY=${GITHUB_REPOSITORY#*/}
+          echo LIBRARY: $LIBRARY
+          echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
+          echo GITHUB_BASE_REF: $GITHUB_BASE_REF
+          echo GITHUB_REF: $GITHUB_REF
+          REF=${GITHUB_BASE_REF:-$GITHUB_REF}
+          REF=${REF#refs/heads/}
+          echo REF: $REF
+          BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
+          echo BOOST_BRANCH: $BOOST_BRANCH
+          cd ..
+          git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs/$LIBRARY
+          cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
+          git submodule update --init tools/boostdep
+          python tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY
+
+      - name: Configure
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON ..
+
+      - name: Build tests
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests
+
+  sycl-cmake-test:
+    strategy:
+      fail-fast: false
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Intel Apt repository
+        timeout-minutes: 1
+        run: |
+          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+          echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
+          sudo apt-get update
+
+      - name: Install Intel oneAPI compilers
+        timeout-minutes: 5
+        run: sudo apt-get install intel-oneapi-compiler-fortran intel-oneapi-compiler-dpcpp-cpp
+
+      - name: Setup Intel oneAPI environment
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          printenv >> $GITHUB_ENV
+
+      - name: checkout project code
+        uses: actions/checkout@v4
+
+      - name: Install Packages
+        run: |
+          sudo apt-get install -y cmake make
+
+      - name: Setup Boost
+        run: |
+          echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
+          LIBRARY=${GITHUB_REPOSITORY#*/}
+          echo LIBRARY: $LIBRARY
+          echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
+          echo GITHUB_BASE_REF: $GITHUB_BASE_REF
+          echo GITHUB_REF: $GITHUB_REF
+          REF=${GITHUB_BASE_REF:-$GITHUB_REF}
+          REF=${REF#refs/heads/}
+          echo REF: $REF
+          BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
+          echo BOOST_BRANCH: $BOOST_BRANCH
+          cd ..
+          git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs/$LIBRARY
+          cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
+          git submodule update --init tools/boostdep
+          python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY
+      - name: Configure
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DBOOST_MATH_ENABLE_SYCL=ON ..
+      - name: Build tests
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests -j $(nproc)
+      - name: Run tests
+        run: |
+          cd ../boost-root/__build__
+          ctest --output-on-failure --no-tests=error
+  cuda-cmake-test:
+    strategy:
+      fail-fast: false
+
+    runs-on: ubuntu-22.04
+
+    steps:
+      - uses: Jimver/cuda-toolkit@v0.2.16
+        id: cuda-toolkit
+        with:
+          cuda: '12.5.0'
+          method: 'network'
+          sub-packages: '["nvcc"]'
+
+      - name: Output CUDA information
+        run: |
+          echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+
+          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
+          nvcc -V
+      - uses: actions/checkout@v4
+
+      - name: Install Packages
+        run: |
+          sudo apt-get install -y cmake make
+      - name: Setup Boost
+        run: |
+          echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
+          LIBRARY=${GITHUB_REPOSITORY#*/}
+          echo LIBRARY: $LIBRARY
+          echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
+          echo GITHUB_BASE_REF: $GITHUB_BASE_REF
+          echo GITHUB_REF: $GITHUB_REF
+          REF=${GITHUB_BASE_REF:-$GITHUB_REF}
+          REF=${REF#refs/heads/}
+          echo REF: $REF
+          BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
+          echo BOOST_BRANCH: $BOOST_BRANCH
+          cd ..
+          git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs/$LIBRARY
+          cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
+          git submodule update --init tools/boostdep
+          python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY
+      - name: Configure
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.5 ..
+      - name: Build tests
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests -j $(nproc)
+      # Will leave this commented out for now. GHA does not install graphics cards by default
+      #- name: Run tests
+      #  run: |
+      #    cd ../boost-root/__build__
+      #    ctest --output-on-failure --no-tests=error
+  nvrtc-cmake-test:
+    strategy:
+      fail-fast: false
+
+    runs-on: ubuntu-22.04
+
+    steps:
+      - uses: Jimver/cuda-toolkit@v0.2.16
+        id: cuda-toolkit
+        with:
+          cuda: '12.5.0'
+          method: 'network'
+
+      - name: Output CUDA information
+        run: |
+          echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+
+          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
+          nvcc -V
+      - uses: actions/checkout@v4
+
+      - name: Install Packages
+        run: |
+          sudo apt-get install -y cmake make
+      - name: Setup Boost
+        run: |
+          echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
+          LIBRARY=${GITHUB_REPOSITORY#*/}
+          echo LIBRARY: $LIBRARY
+          echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
+          echo GITHUB_BASE_REF: $GITHUB_BASE_REF
+          echo GITHUB_REF: $GITHUB_REF
+          REF=${GITHUB_BASE_REF:-$GITHUB_REF}
+          REF=${REF#refs/heads/}
+          echo REF: $REF
+          BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
+          echo BOOST_BRANCH: $BOOST_BRANCH
+          cd ..
+          git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs/$LIBRARY
+          cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
+          git submodule update --init tools/boostdep
+          python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY
+      - name: Configure
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_NVRTC=1 -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.5 -DBOOST_MATH_NVRTC_CI_RUN=1 ..
+          pwd
+      - name: Build tests
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests -j $(nproc)
+      # We don't have the ability for runtime right now
+      #- name: Run tests
+      #  run: |
+      #    cd ../boost-root/__build__
+      #    ctest --output-on-failure --no-tests=error
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7e7790271c..7965bd1ea9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,13 +5,17 @@
 
 cmake_minimum_required(VERSION 3.5...3.16)
 
-project(boost_math VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX)
+project(boost_math VERSION 1.87.0 LANGUAGES CXX)
 
 add_library(boost_math INTERFACE)
 
 add_library(Boost::math ALIAS boost_math)
 
 target_include_directories(boost_math INTERFACE include)
+if(NOT CMAKE_VERSION VERSION_LESS "3.19")
+  file(GLOB_RECURSE headers include/*.hpp)
+  target_sources(boost_math PRIVATE ${headers})
+endif()
 
 include(CMakeDependentOption)
 
@@ -41,12 +45,17 @@ else()
 
 endif()
 
+if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt")
+
+  add_subdirectory(test)
+
 # Only enable tests when we're the root project
-if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+elseif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
 
   include(CTest)
   add_subdirectory(test)
   
   include(GNUInstallDirs)
   install(DIRECTORY "include/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+
 endif()
diff --git a/build.jam b/build.jam
new file mode 100644
index 0000000000..fb244e511d
--- /dev/null
+++ b/build.jam
@@ -0,0 +1,48 @@
+# Copyright René Ferdinand Rivera Morell 2023-2024
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+require-b2 5.2 ;
+
+constant boost_dependencies :
+    /boost/assert//boost_assert
+    /boost/concept_check//boost_concept_check
+    /boost/config//boost_config
+    /boost/core//boost_core
+    /boost/integer//boost_integer
+    /boost/lexical_cast//boost_lexical_cast
+    /boost/predef//boost_predef
+    /boost/random//boost_random
+    /boost/static_assert//boost_static_assert
+    /boost/throw_exception//boost_throw_exception ;
+
+project /boost/math
+    : common-requirements
+        <include>include
+    ;
+
+explicit
+    [ alias boost_math : : : : <library>$(boost_dependencies) ]
+    [ alias boost_math_c99 : build//boost_math_c99 ]
+    [ alias boost_math_c99f : build//boost_math_c99f ]
+    [ alias boost_math_c99l : build//boost_math_c99l ]
+    [ alias boost_math_tr1 : build//boost_math_tr1 ]
+    [ alias boost_math_tr1f : build//boost_math_tr1f ]
+    [ alias boost_math_tr1l : build//boost_math_tr1l ]
+    [ alias all :
+        boost_math
+        boost_math_c99 boost_math_c99f boost_math_c99l
+        boost_math_tr1 boost_math_tr1f boost_math_tr1l
+        example test ]
+    [ alias testing : : : :
+        <include>test
+        <include>include_private ]
+    ;
+
+call-if : boost-library math
+    : install boost_math
+        boost_math_c99 boost_math_c99f boost_math_c99l
+        boost_math_tr1 boost_math_tr1f boost_math_tr1l
+    ;
+
diff --git a/build/Jamfile.v2 b/build/Jamfile.v2
index 6549e06b79..500c77363d 100644
--- a/build/Jamfile.v2
+++ b/build/Jamfile.v2
@@ -1,16 +1,18 @@
 # copyright John Maddock 2008
-# Distributed under the Boost Software License, Version 1.0. 
-# (See accompanying file LICENSE_1_0.txt or copy at 
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
 # http://www.boost.org/LICENSE_1_0.txt.
 
 import testing ;
 import pch ;
-import ../../config/checks/config : requires ;
+import-search /boost/config/checks ;
+import config : requires ;
 
-project  
-    : requirements 
-      <toolset>intel-win:<cxxflags>-nologo 
-      <toolset>intel-win:<linkflags>-nologo 
+project
+    : common-requirements <library>$(boost_dependencies)
+    : requirements
+      <toolset>intel-win:<cxxflags>-nologo
+      <toolset>intel-win:<linkflags>-nologo
       #<toolset>intel-linux:<pch>off
       <toolset>intel-darwin:<pch>off
       <toolset>gcc,<target-os>windows:<pch>off
@@ -20,9 +22,11 @@ project
       [ check-target-builds ../config//has_gcc_visibility "gcc visibility" : <toolset>gcc:<cxxflags>-fvisibility=hidden : ]
       [ requires cxx11_noexcept cxx11_rvalue_references sfinae_expr cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_hdr_initializer_list cxx11_hdr_chrono cxx11_thread_local cxx11_constexpr cxx11_nullptr cxx11_numeric_limits cxx11_decltype cxx11_hdr_array cxx11_hdr_atomic cxx11_hdr_type_traits cxx11_allocator cxx11_explicit_conversion_operators ]
       [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction ]
+    : usage-requirements
+        <define>BOOST_MATH_TR1_NO_LIB=1
     ;
 
-cpp-pch pch : ../src/tr1/pch.hpp : <include>../src/tr1 <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1 ; 
+cpp-pch pch : ../src/tr1/pch.hpp : <include>../src/tr1 <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1 ;
 
 C99_SOURCES = acosh
 asinh
@@ -46,7 +50,7 @@ round
 tgamma
 trunc ;
 
-TR1_SOURCES = 
+TR1_SOURCES =
 assoc_laguerre
 assoc_legendre
 beta
@@ -80,22 +84,22 @@ import targets ;
 
 obj long_double_check : ../config/has_long_double_support.cpp ;
 explicit long_double_check ;
-        
+
 # Library targets
 lib boost_math_tr1 : ../src/tr1/$(TR1_SOURCES).cpp pch
-    :         
+    :
          <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1
          <include>../src/tr1
    ;
 
 lib boost_math_tr1f : ../src/tr1/$(TR1_SOURCES)f.cpp pch
-    :         
+    :
          <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1
          <include>../src/tr1
    ;
 
 lib boost_math_tr1l : ../src/tr1/$(TR1_SOURCES)l.cpp pch
-    :         
+    :
          <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1
          <dependency>../config//has_long_double_support
          <include>../src/tr1
@@ -103,23 +107,21 @@ lib boost_math_tr1l : ../src/tr1/$(TR1_SOURCES)l.cpp pch
    ;
 
 lib boost_math_c99 : ../src/tr1/$(C99_SOURCES).cpp pch
-    :         
+    :
          <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1
          <include>../src/tr1
    ;
 
 lib boost_math_c99f : ../src/tr1/$(C99_SOURCES)f.cpp pch
-    :         
+    :
          <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1
          <include>../src/tr1
    ;
 
 lib boost_math_c99l : ../src/tr1/$(C99_SOURCES)l.cpp pch
-    :         
+    :
          <link>shared:<define>BOOST_MATH_TR1_DYN_LINK=1
          <dependency>../config//has_long_double_support
          <include>../src/tr1
          [ check-target-builds ../config//has_long_double_support "long double support" : : <build>no ]
    ;
-
-boost-install boost_math_c99 boost_math_c99f boost_math_c99l boost_math_tr1 boost_math_tr1f boost_math_tr1l ;
diff --git a/config/Jamfile.v2 b/config/Jamfile.v2
index 77aca7c2e3..650e888809 100644
--- a/config/Jamfile.v2
+++ b/config/Jamfile.v2
@@ -9,11 +9,11 @@ import path ;
 local ntl-path = [ modules.peek : NTL_PATH ] ;
 local gmp_path = [ modules.peek : GMP_PATH ] ;
 
-lib quadmath ;
-lib fftw3 ;
-lib fftw3f ;
-lib fftw3l ;
-lib fftw3q ;
+searched-lib quadmath ;
+searched-lib fftw3 ;
+searched-lib fftw3f ;
+searched-lib fftw3l ;
+searched-lib fftw3q ;
 
 obj has_long_double_support : has_long_double_support.cpp ;
 obj has_mpfr_class : has_mpfr_class.cpp :
diff --git a/doc/Jamfile.v2 b/doc/Jamfile.v2
index 71746726f5..511262d493 100644
--- a/doc/Jamfile.v2
+++ b/doc/Jamfile.v2
@@ -13,7 +13,7 @@ path-constant here : . ;
 constant here-url : [ regex.replace $(here) "\\\\" "/" ] ;
 
 xml math : math.qbk :
-        <quickbook-define>enable_index 
+        <quickbook-define>enable_index
         <quickbook-define>__base_path__=$(here-url)
         <format>pdf:<quickbook-define>__build_pdf
         <format>html:<quickbook-define>__build_html
@@ -25,7 +25,7 @@ boostbook standalone
         # Path for links to Boost:
         <xsl:param>boost.root=../../../..
         <xsl:param>html.stylesheet=math.css
-        
+
         # Some general style settings:
         <xsl:param>table.footnote.number.format=1
         <xsl:param>footnote.number.format=1
@@ -46,11 +46,11 @@ boostbook standalone
         # Index on type:
         <xsl:param>index.on.type=1
         <xsl:param>boost.noexpand.chapter.toc=1
-        
+
         #<xsl:param>root.filename="sf_dist_and_tools"
         #<xsl:param>graphicsize.extension=1
         #<xsl:param>use.extensions=1
-        
+
         # PDF Options:
         # TOC Generation: this is needed for FOP-0.9 and later:
         <xsl:param>fop1.extensions=0
@@ -74,7 +74,7 @@ boostbook standalone
         # better use SVG's instead:
         <format>pdf:<xsl:param>admon.graphics.extension=".svg"
         <format>pdf:<xsl:param>admon.graphics.path=$(here)/../../../doc/src/images/
-        <format>pdf:<xsl:param>use.role.for.mediaobject=1 
+        <format>pdf:<xsl:param>use.role.for.mediaobject=1
         <format>pdf:<xsl:param>preferred.mediaobject.role=print
         <format>pdf:<xsl:param>img.src.path=$(images_location)/
         <format>pdf:<xsl:param>draft.mode="no"
@@ -82,7 +82,7 @@ boostbook standalone
         <auto-index>on <format>pdf:<auto-index-internal>off
         <format>html:<auto-index-internal>on
         <auto-index-script>$(here)/index.idx
-        <auto-index-prefix>$(here)/../../.. 
+        <auto-index-prefix>$(here)/../include
         #<auto-index-verbose>on
         <format>pdf:<xsl:param>index.on.type=1
     ;
@@ -90,7 +90,7 @@ boostbook standalone
 install pdfinstall : standalone/<format>pdf : <location>. <install-type>PDF <name>math.pdf ;
 explicit pdfinstall ; # b2 pdf pdfinstall to do this pdf file copy.
 
-install css_install : math.css : <location>$(here)/html ; 
+install css_install : math.css : <location>$(here)/html ;
 
 ###############################################################################
 alias boostdoc ;
diff --git a/doc/constants/constants.qbk b/doc/constants/constants.qbk
index 24092adf56..9cce152da1 100644
--- a/doc/constants/constants.qbk
+++ b/doc/constants/constants.qbk
@@ -227,6 +227,11 @@ either construct from a decimal digit string or calculate on the fly depending u
 [[Any other value ['N]][Sets the compile time precision to ['N] bits.]]
 ]
 
+[h5 GPU Support]
+
+All Boost.Math constants are marked with `BOOST_MATH_GPU_ENABLED` and can be used on both host and device.
+Note that when running on device you are limited to using only `float` and `double` types.
+
 [h5 Custom Specializing a constant]
 
 In addition, for user-defined types that need special handling, it's possible to partially-specialize
diff --git a/doc/distributions/arcsine.qbk b/doc/distributions/arcsine.qbk
index fbd6e86b1e..7930f97d5a 100644
--- a/doc/distributions/arcsine.qbk
+++ b/doc/distributions/arcsine.qbk
@@ -21,11 +21,11 @@
       typedef Policy    policy_type;
 
       // Constructor from two range parameters, x_min and x_max:
-      arcsine_distribution(RealType x_min = 0, RealType x_max = 1);
+      BOOST_MATH_GPU_ENABLED arcsine_distribution(RealType x_min = 0, RealType x_max = 1);
 
       // Range Parameter accessors:
-      RealType x_min() const;
-      RealType x_max() const;
+      BOOST_MATH_GPU_ENABLED RealType x_min() const;
+      BOOST_MATH_GPU_ENABLED RealType x_max() const;
    };
    }} // namespaces
 
@@ -103,8 +103,8 @@ constructs a 'Standard 01' arcsine distribution.
 
 [h5 Parameter Accessors]
 
-   RealType x_min() const;
-   RealType x_max() const;
+   BOOST_MATH_GPU_ENABLED RealType x_min() const;
+   BOOST_MATH_GPU_ENABLED RealType x_max() const;
 
 Return the parameter ['x_min] or  ['x_max] from which this distribution was constructed.
 
@@ -116,6 +116,8 @@ So, for example:
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The formulae for calculating these are shown in the table below, and at
 [@http://mathworld.wolfram.com/arcsineDistribution.html Wolfram Mathworld].
diff --git a/doc/distributions/bernoulli.qbk b/doc/distributions/bernoulli.qbk
index 4a2fc7b618..719c42cd9e 100644
--- a/doc/distributions/bernoulli.qbk
+++ b/doc/distributions/bernoulli.qbk
@@ -16,9 +16,9 @@
        typedef RealType  value_type;
        typedef Policy    policy_type;
 
-       bernoulli_distribution(RealType p); // Constructor.
+       BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p); // Constructor.
        // Accessor function.
-       RealType success_fraction() const
+       BOOST_MATH_GPU_ENABLED RealType success_fraction() const
        // Probability of success (as a fraction).
     };
    }} // namespaces
@@ -51,12 +51,12 @@ and the [@http://en.wikipedia.org/wiki/Cumulative_Distribution_Function Cumulati
 
 [h4 Member Functions]
 
-   bernoulli_distribution(RealType p);
+   BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p);
 
 Constructs a [@http://en.wikipedia.org/wiki/bernoulli_distribution
 bernoulli distribution] with success_fraction /p/.
 
-   RealType success_fraction() const
+   BOOST_MATH_GPU_ENABLED RealType success_fraction() const
 
 Returns the /success_fraction/ parameter of this distribution.
 
@@ -64,6 +64,8 @@ Returns the /success_fraction/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is 0 and 1,
 and the useful supported range is only 0 or 1.
diff --git a/doc/distributions/beta.qbk b/doc/distributions/beta.qbk
index 95943f715d..5ba1a6d1cc 100644
--- a/doc/distributions/beta.qbk
+++ b/doc/distributions/beta.qbk
@@ -19,30 +19,30 @@
       typedef RealType  value_type;
       typedef Policy    policy_type;
       // Constructor from two shape parameters, alpha & beta:
-      beta_distribution(RealType a, RealType b);
+      BOOST_MATH_GPU_ENABLED beta_distribution(RealType a, RealType b);
       
       // Parameter accessors:
-      RealType alpha() const;
-      RealType beta() const;
+      BOOST_MATH_GPU_ENABLED RealType alpha() const;
+      BOOST_MATH_GPU_ENABLED RealType beta() const;
       
       // Parameter estimators of alpha or beta from mean and variance.
-      static RealType find_alpha(
+      BOOST_MATH_GPU_ENABLED static RealType find_alpha(
         RealType mean, // Expected value of mean.
         RealType variance); // Expected value of variance.
       
-      static RealType find_beta(
+      BOOST_MATH_GPU_ENABLED static RealType find_beta(
         RealType mean, // Expected value of mean.
         RealType variance); // Expected value of variance.
   
       // Parameter estimators from
       // either alpha or beta, and x and probability.
       
-      static RealType find_alpha(
+      BOOST_MATH_GPU_ENABLED static RealType find_alpha(
         RealType beta, // from beta.
         RealType x, //  x.
         RealType probability); // cdf
       
-      static RealType find_beta(
+      BOOST_MATH_GPU_ENABLED static RealType find_beta(
         RealType alpha, // alpha.
         RealType x, // probability x.
         RealType probability); // probability cdf.
@@ -98,7 +98,7 @@ whose apex is away from the centre (where x = half).
 
 [h5 Constructor]
 
-   beta_distribution(RealType alpha, RealType beta);
+   BOOST_MATH_GPU_ENABLED beta_distribution(RealType alpha, RealType beta);
 
 Constructs a beta distribution with shape parameters /alpha/ and /beta/.
 
@@ -117,11 +117,11 @@ in the graph above).
 
 [h5 Parameter Accessors]
 
-   RealType alpha() const;
+   BOOST_MATH_GPU_ENABLED RealType alpha() const;
    
 Returns the parameter /alpha/ from which this distribution was constructed.
    
-   RealType beta() const;
+   BOOST_MATH_GPU_ENABLED RealType beta() const;
    
 Returns the parameter /beta/ from which this distribution was constructed.
 
@@ -182,6 +182,8 @@ Returns the value of [beta] that gives:
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The formulae for calculating these are shown in the table below, and at
 [@http://mathworld.wolfram.com/BetaDistribution.html Wolfram Mathworld].
diff --git a/doc/distributions/cauchy.qbk b/doc/distributions/cauchy.qbk
index 6ae090818a..e59e3760ed 100644
--- a/doc/distributions/cauchy.qbk
+++ b/doc/distributions/cauchy.qbk
@@ -15,10 +15,10 @@
       typedef RealType  value_type;
       typedef Policy    policy_type;
 
-      cauchy_distribution(RealType location = 0, RealType scale = 1);
+      BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType location = 0, RealType scale = 1);
       
-      RealType location()const;
-      RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
    };
    
 The [@http://en.wikipedia.org/wiki/Cauchy_distribution Cauchy-Lorentz distribution]
@@ -53,7 +53,7 @@ the distribution:
 
 [h4 Member Functions]
 
-   cauchy_distribution(RealType location = 0, RealType scale = 1);
+   BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType location = 0, RealType scale = 1);
    
 Constructs a Cauchy distribution, with location parameter /location/
 and scale parameter /scale/.  When these parameters take their default
@@ -62,11 +62,11 @@ then the result is a Standard Cauchy Distribution.
 
 Requires scale > 0, otherwise calls __domain_error.
    
-   RealType location()const;
+   BOOST_MATH_GPU_ENABLED RealType location()const;
    
 Returns the location parameter of the distribution.
    
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
    
 Returns the scale parameter of the distribution.
 
@@ -74,6 +74,8 @@ Returns the scale parameter of the distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 Note however that the Cauchy distribution does not have a mean,
 standard deviation, etc. See __math_undefined
@@ -116,7 +118,7 @@ So recall that for `x < 0`:
 
 Substituting into the above we get:
 
-[expression p = -atan(1/x)  ; x < 0]
+[expression p = -atan(1/x) / [pi]  ; x < 0]
 
 So the procedure is to calculate the cdf for -fabs(x)
 using the above formula.  Note that to factor in the location and scale
diff --git a/doc/distributions/chi_squared.qbk b/doc/distributions/chi_squared.qbk
index 753e1f401d..b52d4d392d 100644
--- a/doc/distributions/chi_squared.qbk
+++ b/doc/distributions/chi_squared.qbk
@@ -18,13 +18,13 @@
       typedef Policy    policy_type;
 
       // Constructor:
-      chi_squared_distribution(RealType i);
+      BOOST_MATH_GPU_ENABLED chi_squared_distribution(RealType i);
 
       // Accessor to parameter:
-      RealType degrees_of_freedom()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const;
 
       // Parameter estimation:
-      static RealType find_degrees_of_freedom(
+      BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
          RealType difference_from_mean,
          RealType alpha,
          RealType beta,
@@ -104,6 +104,8 @@ See also section on Sample sizes required in
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 (We have followed the usual restriction of the mode to degrees of freedom >= 2,
 but note that the maximum of the pdf is actually zero for degrees of freedom from 2 down to 0,
diff --git a/doc/distributions/dist_reference.qbk b/doc/distributions/dist_reference.qbk
index c225d1953e..2dd06bcb8b 100644
--- a/doc/distributions/dist_reference.qbk
+++ b/doc/distributions/dist_reference.qbk
@@ -16,15 +16,18 @@
 [include fisher.qbk]
 [include gamma.qbk]
 [include geometric.qbk]
+[include holtsmark.qbk]
 [include hyperexponential.qbk]
 [include hypergeometric.qbk]
 [include inverse_chi_squared.qbk]
 [include inverse_gamma.qbk]
 [include inverse_gaussian.qbk]
 [include kolmogorov_smirnov.qbk]
+[include landau.qbk]
 [include laplace.qbk]
 [include logistic.qbk]
 [include lognormal.qbk]
+[include mapairy.qbk]
 [include negative_binomial.qbk]
 [include nc_beta.qbk]
 [include nc_chi_squared.qbk]
@@ -34,6 +37,7 @@
 [include pareto.qbk]
 [include poisson.qbk]
 [include rayleigh.qbk]
+[include saspoint5.qbk]
 [include skew_normal.qbk]
 [include students_t.qbk]
 [include triangular.qbk]
diff --git a/doc/distributions/dist_tutorial.qbk b/doc/distributions/dist_tutorial.qbk
index ee28dbdca5..80e8e2e458 100644
--- a/doc/distributions/dist_tutorial.qbk
+++ b/doc/distributions/dist_tutorial.qbk
@@ -128,12 +128,49 @@ And quantiles are just the same:
    quantile(my_dist, p);  // Returns the value of the random variable x
                           // such that cdf(my_dist, x) == p.
 
+As are the logcdf (Natural log of the Cumulative Distribution Function):
+
+   logcdf(my_dist, x); // Returns logcdf at at point x of distribution my_dist.
+
+And the logpdf (Natural log of the Probability Density Function):
+
+   logpdf(my_dist, x); // Returns logpdf at point x of distribution my_dist.
+
 If you're wondering why these aren't member functions, it's to
 make the library more easily extensible: if you want to add additional
 generic operations - let's say the /n'th moment/ - then all you have to
 do is add the appropriate non-member functions, overloaded for each
 implemented distribution type.
 
+The logcdf and logpdf functions are minimally calculated with log(cdf(my_dist, x)),
+and log(pdf(my_dist, x)) respectively. The following distributions have specialized
+implementations of the logcdf:
+
+* Exponential
+* Extreme Value
+* Geometric
+* Laplace
+* Logistic
+* Pareto
+* Rayleigh
+* Weibull
+
+And the following distributions have specialized implementations of logpdf:
+
+* Exponential
+* Extreme Value
+* Gamma
+* Inverse Gamma
+* Inverse Gaussian
+* Laplace
+* Normal
+* Poisson
+* Rayleigh
+* Weibull
+
+These above listed specialized implementations allow a higher degree of precision
+than can be obtained through the naive generic method.
+
 [tip
 
 [*Random numbers that approximate Quantiles of Distributions]
diff --git a/doc/distributions/exponential.qbk b/doc/distributions/exponential.qbk
index 043818b4a4..3c90a96483 100644
--- a/doc/distributions/exponential.qbk
+++ b/doc/distributions/exponential.qbk
@@ -15,9 +15,9 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
 
-      exponential_distribution(RealType lambda = 1);
+      BOOST_MATH_GPU_ENABLED exponential_distribution(RealType lambda = 1);
 
-      RealType lambda()const;
+      BOOST_MATH_GPU_ENABLED RealType lambda()const;
    };
 
 
@@ -37,7 +37,7 @@ values of the rate parameter lambda:
 
 [h4 Member Functions]
 
-   exponential_distribution(RealType lambda = 1);
+   BOOST_MATH_GPU_ENABLED exponential_distribution(RealType lambda = 1);
    
 Constructs an
 [@http://en.wikipedia.org/wiki/Exponential_distribution Exponential distribution]
@@ -46,7 +46,7 @@ Lambda is defined as the reciprocal of the scale parameter.
 
 Requires lambda > 0, otherwise calls __domain_error.
 
-   RealType lambda()const;
+   BOOST_MATH_GPU_ENABLED RealType lambda()const;
    
 Accessor function returns the lambda parameter of the distribution.
    
@@ -54,9 +54,14 @@ Accessor function returns the lambda parameter of the distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0, +[infin]\].
 
+In this distribution the implementation of both `logcdf`, and `logpdf` are specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The exponential distribution is implemented in terms of the 
@@ -71,7 +76,9 @@ In the following table [lambda] is the parameter lambda of the distribution,
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = [lambda] * exp(-[lambda] * x) ]]
+[[logpdf][log(pdf) = -expm1(-x * [lambda]) ]]
 [[cdf][Using the relation: p = 1 - exp(-x * [lambda]) = -expm1(-x * [lambda]) ]]
+[[logcdf][log(cdf) = log1p(-exp(-x * [lambda])) ]]
 [[cdf complement][Using the relation: q = exp(-x * [lambda]) ]]
 [[quantile][Using the relation: x = -log(1-p) / [lambda] = -log1p(-p) / [lambda]]]
 [[quantile from the complement][Using the relation: x = -log(q) / [lambda]]]
diff --git a/doc/distributions/extreme_value.qbk b/doc/distributions/extreme_value.qbk
index 314917ebc1..f47467d2bd 100644
--- a/doc/distributions/extreme_value.qbk
+++ b/doc/distributions/extreme_value.qbk
@@ -14,10 +14,10 @@
    public:
       typedef RealType value_type;
 
-      extreme_value_distribution(RealType location = 0, RealType scale = 1);
+      BOOST_MATH_GPU_ENABLED extreme_value_distribution(RealType location = 0, RealType scale = 1);
 
-      RealType scale()const;
-      RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType location()const;
    };
 
 There are various
@@ -59,18 +59,18 @@ And this graph illustrates how the PDF varies with the shape parameter:
 
 [h4 Member Functions]
 
-   extreme_value_distribution(RealType location = 0, RealType scale = 1);
+   BOOST_MATH_GPU_ENABLED extreme_value_distribution(RealType location = 0, RealType scale = 1);
    
 Constructs an Extreme Value distribution with the specified location and scale
 parameters.
 
 Requires `scale > 0`, otherwise calls __domain_error.
 
-   RealType location()const;
+   BOOST_MATH_GPU_ENABLED RealType location()const;
    
 Returns the location parameter of the distribution.
    
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
    
 Returns the scale parameter of the distribution.
    
@@ -78,9 +78,14 @@ Returns the scale parameter of the distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random parameter is \[-[infin], +[infin]\].
 
+In this distribution the implementation of both `logcdf`, and `logpdf` are specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The extreme value distribution is implemented in terms of the 
@@ -96,7 +101,9 @@ In the following table:
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = exp((a-x)/b) * exp(-exp((a-x)/b)) / b ]]
+[[logpdf][log(pdf) = log(1/b) + e - exp(e) ]]
 [[cdf][Using the relation: p = exp(-exp((a-x)/b)) ]]
+[[logcdf][log(cdf) = -exp((a-x)/b) ]]
 [[cdf complement][Using the relation: q = -expm1(-exp((a-x)/b)) ]]
 [[quantile][Using the relation: a - log(-log(p)) * b]]
 [[quantile from the complement][Using the relation: a - log(-log1p(-q)) * b]]
diff --git a/doc/distributions/fisher.qbk b/doc/distributions/fisher.qbk
index 80c9a9b29b..9b3a55f59d 100644
--- a/doc/distributions/fisher.qbk
+++ b/doc/distributions/fisher.qbk
@@ -17,11 +17,11 @@
       typedef RealType value_type;
       
       // Construct:
-      fisher_f_distribution(const RealType& i, const RealType& j);
+      BOOST_MATH_GPU_ENABLED fisher_f_distribution(const RealType& i, const RealType& j);
       
       // Accessors:
-      RealType degrees_of_freedom1()const;
-      RealType degrees_of_freedom2()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const;
    };
    
    }} //namespaces
@@ -46,7 +46,7 @@ two degrees of freedom parameters.
 
 [h4 Member Functions]
 
-      fisher_f_distribution(const RealType& df1, const RealType& df2);
+      BOOST_MATH_GPU_ENABLED fisher_f_distribution(const RealType& df1, const RealType& df2);
       
 Constructs an F-distribution with numerator degrees of freedom /df1/
 and denominator degrees of freedom /df2/.
@@ -54,11 +54,11 @@ and denominator degrees of freedom /df2/.
 Requires that /df1/ and /df2/ are both greater than zero, otherwise __domain_error
 is called.
       
-      RealType degrees_of_freedom1()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const;
       
 Returns the numerator degrees of freedom parameter of the distribution.
 
-      RealType degrees_of_freedom2()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const;
       
 Returns the denominator degrees of freedom parameter of the distribution.
 
@@ -66,6 +66,8 @@ Returns the denominator degrees of freedom parameter of the distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0, +[infin]\].
 
diff --git a/doc/distributions/gamma.qbk b/doc/distributions/gamma.qbk
index eefcc84a0c..dd34ed2fc0 100644
--- a/doc/distributions/gamma.qbk
+++ b/doc/distributions/gamma.qbk
@@ -12,10 +12,10 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
 
-      gamma_distribution(RealType shape, RealType scale = 1)
+      BOOST_MATH_GPU_ENABLED gamma_distribution(RealType shape, RealType scale = 1)
 
-      RealType shape()const;
-      RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType shape()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
    };
    
    }} // namespaces
@@ -76,7 +76,7 @@ a dedicated Erlang Distribution.
 
 [h4 Member Functions]
 
-   gamma_distribution(RealType shape, RealType scale = 1);
+   BOOST_MATH_GPU_ENABLED gamma_distribution(RealType shape, RealType scale = 1);
    
 Constructs a gamma distribution with shape /shape/ and 
 scale /scale/.
@@ -84,11 +84,11 @@ scale /scale/.
 Requires that the shape and scale parameters are greater than zero, otherwise calls
 __domain_error.
 
-   RealType shape()const;
+   BOOST_MATH_GPU_ENABLED RealType shape()const;
    
 Returns the /shape/ parameter of this distribution.
    
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
       
 Returns the /scale/ parameter of this distribution.
 
@@ -96,9 +96,14 @@ Returns the /scale/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0,+[infin]\].
 
+In this distribution the implementation of `logpdf` is specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The gamma distribution is implemented in terms of the 
@@ -115,6 +120,7 @@ and /q = 1-p/.
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = __gamma_p_derivative(k, x / [theta]) / [theta] ]]
+[[logpdf][log(pdf) = -k*log([theta]) + (k-1)*log(x) - lgamma(k) - (x/[theta]) ]]
 [[cdf][Using the relation: p = __gamma_p(k, x / [theta]) ]]
 [[cdf complement][Using the relation: q = __gamma_q(k, x / [theta]) ]]
 [[quantile][Using the relation: x = [theta] * __gamma_p_inv(k, p) ]]
diff --git a/doc/distributions/geometric.qbk b/doc/distributions/geometric.qbk
index 7aa1a33439..2a4357a2a5 100644
--- a/doc/distributions/geometric.qbk
+++ b/doc/distributions/geometric.qbk
@@ -17,28 +17,28 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Constructor from success_fraction:
-      geometric_distribution(RealType p);
+      BOOST_MATH_GPU_ENABLED geometric_distribution(RealType p);
       
       // Parameter accessors:
-      RealType success_fraction() const;
-      RealType successes() const;
+      BOOST_MATH_GPU_ENABLED RealType success_fraction() const;
+      BOOST_MATH_GPU_ENABLED RealType successes() const;
      
       // Bounds on success fraction:
-      static RealType find_lower_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p(
          RealType trials, 
          RealType successes,
          RealType probability); // alpha
-      static RealType find_upper_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p(
          RealType trials, 
          RealType successes,
          RealType probability); // alpha
          
       // Estimate min/max number of trials:
-      static RealType find_minimum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials(
          RealType k,     // Number of failures.
          RealType p,     // Success fraction.
          RealType probability); // Probability threshold alpha.
-      static RealType find_maximum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials(
          RealType k,     // Number of failures.
          RealType p,     // Success fraction.
          RealType probability); // Probability threshold alpha.
@@ -268,6 +268,8 @@ of observing more than k failures.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 However it's worth taking a moment to define what these actually mean in 
 the context of this distribution:
@@ -303,6 +305,9 @@ the context of this distribution:
    ``quantile(complement(geometric(p), P))``]]
 ]
 
+In this distribution the implementation of `logcdf` is specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 This distribution is implemented using the pow and exp functions, so most results
@@ -322,6 +327,7 @@ the expected number of failures using the quantile.
 [[Function][Implementation Notes]]
 [[pdf][pdf =  p * pow(q, k)]]
 [[cdf][cdf = 1 - q[super k=1]]]
+[[logcdf][log(cdf) = log1p(-exp(log1p(-p) * (k+1)))]]
 [[cdf complement][exp(log1p(-p) * (k+1))]]
 [[quantile][k = log1p(-x) / log1p(-p) -1]]
 [[quantile from the complement][k = log(x) / log1p(-p) -1]]
diff --git a/doc/distributions/holtsmark.qbk b/doc/distributions/holtsmark.qbk
new file mode 100644
index 0000000000..39c42ff133
--- /dev/null
+++ b/doc/distributions/holtsmark.qbk
@@ -0,0 +1,118 @@
+﻿[section:holtsmark_dist Holtsmark Distribution]
+
+``#include <boost/math/distributions/holtsmark.hpp>``
+
+   template <class RealType = double,
+             class ``__Policy``   = ``__policy_class`` >
+   class holtsmark_distribution;
+
+   typedef holtsmark_distribution<> holtsmark;
+
+   template <class RealType, class ``__Policy``>
+   class holtsmark_distribution
+   {
+   public:
+      typedef RealType  value_type;
+      typedef Policy    policy_type;
+
+      BOOST_MATH_GPU_ENABLED holtsmark_distribution(RealType location = 0, RealType scale = 1);
+
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
+   };
+
+The [@http://en.wikipedia.org/wiki/holtsmark_distribution Holtsmark distribution]
+is named after Johan Peter Holtsmark.
+It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution]
+with shape parameter [alpha]=3/2, [beta]=0.
+
+[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF]
+given by:
+
+[equation holtsmark_ref1]  [/f(x; \mu, c)=\frac{1}{2 \pi} \int_{-\infty}^{\infty} \exp( i t \mu - |c t|^{3/2} ) e^{-i x t} dt]
+
+The location parameter [mu] is the location of the distribution,
+while the scale parameter [c] determines the width of the distribution.
+If the location is
+zero, and the scale 1, then the result is a standard holtsmark
+distribution.
+
+The distribution especially used in astrophysics for modeling gravitational bodies.
+
+The following graph shows how the distributions moves as the
+location parameter changes:
+
+[graph holtsmark_pdf1]
+
+While the following graph shows how the shape (scale) parameter alters
+the distribution:
+
+[graph holtsmark_pdf2]
+
+[h4 Member Functions]
+
+   BOOST_MATH_GPU_ENABLED holtsmark_distribution(RealType location = 0, RealType scale = 1);
+
+Constructs a holtsmark distribution, with location parameter /location/
+and scale parameter /scale/.  When these parameters take their default
+values (location = 0, scale = 1)
+then the result is a Standard holtsmark Distribution.
+
+Requires scale > 0, otherwise calls __domain_error.
+
+   BOOST_MATH_GPU_ENABLED RealType location()const;
+
+Returns the location parameter of the distribution.
+
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
+
+Returns the scale parameter of the distribution.
+
+[h4 Non-member Accessors]
+
+All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
+that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
+
+Note however that the holtsmark distribution does not have a skewness,
+kurtosis, etc. See __math_undefined
+[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function]
+to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE,
+which is the default.
+
+Alternately, the functions __skewness, __kurtosis and __kurtosis_excess will all
+return a __domain_error if called.
+
+The domain of the random variable is \[-[max_value], +[min_value]\].
+
+[h4 Accuracy]
+
+The error is within 4 epsilon.
+
+Errors in the PDF at 64-bit double precision:
+
+[$../graphs/holtsmark_pdf_accuracy_64.png]
+
+Errors in the CDF-complement at 64-bit double precision:
+
+[$../graphs/holtsmark_ccdf_accuracy_64.png]
+
+[h4 Implementation]
+
+See references.
+
+[h4 References]
+
+* [@http://en.wikipedia.org/wiki/holtsmark_distribution Holtsmark Distribution]
+* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for Holtsmark Distribution,
+DOI: 10.36227/techrxiv.172054657.73020014/v1, 2024.
+
+[endsect][/section:holtsmark_dist holtsmark]
+
+[/ holtsmark.qbk
+  Copyright Takuma Yoshimura 2024.
+  Distributed under the Boost Software License, Version 1.0.
+  (See accompanying file LICENSE_1_0.txt or copy at
+  http://www.boost.org/LICENSE_1_0.txt).
+]
diff --git a/doc/distributions/inverse_chi_squared.qbk b/doc/distributions/inverse_chi_squared.qbk
index 7bc75a8813..8d67082d07 100644
--- a/doc/distributions/inverse_chi_squared.qbk
+++ b/doc/distributions/inverse_chi_squared.qbk
@@ -12,11 +12,11 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
 
-      inverse_chi_squared_distribution(RealType df = 1); // Not explicitly scaled, default 1/df.
-      inverse_chi_squared_distribution(RealType df, RealType scale = 1/df);  // Scaled.
+      BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1); // Not explicitly scaled, default 1/df.
+      BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df, RealType scale = 1/df);  // Scaled.
 
-      RealType degrees_of_freedom()const; // Default 1.
-      RealType scale()const; // Optional scale [xi] (variance), default 1/degrees_of_freedom.
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; // Default 1.
+      BOOST_MATH_GPU_ENABLED RealType scale()const; // Optional scale [xi] (variance), default 1/degrees_of_freedom.
    };
    
    }} // namespace boost // namespace math
@@ -99,8 +99,8 @@ varies for a few values of parameters [nu] and [xi]:
 
 [h4 Member Functions]
 
-   inverse_chi_squared_distribution(RealType df = 1); // Implicitly scaled 1/df.
-   inverse_chi_squared_distribution(RealType df = 1, RealType scale); // Explicitly scaled.
+   BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1); // Implicitly scaled 1/df.
+   BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1, RealType scale); // Explicitly scaled.
 
 Constructs an inverse chi_squared distribution with [nu] degrees of freedom ['df],
 and scale ['scale] with default value 1\/df.
@@ -108,11 +108,11 @@ and scale ['scale] with default value 1\/df.
 Requires that the degrees of freedom [nu] parameter is greater than zero, otherwise calls
 __domain_error.
 
-   RealType degrees_of_freedom()const; 
+   BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const; 
    
 Returns the degrees_of_freedom [nu] parameter of this distribution.
 
-   RealType scale()const; 
+   BOOST_MATH_GPU_ENABLED RealType scale()const; 
    
 Returns the scale [xi] parameter of this distribution.
 
@@ -120,6 +120,8 @@ Returns the scale [xi] parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variate is \[0,+[infin]\].
 [note Unlike some definitions, this implementation supports a random variate 
diff --git a/doc/distributions/inverse_gamma.qbk b/doc/distributions/inverse_gamma.qbk
index 8fccbc19c4..f657ec31b4 100644
--- a/doc/distributions/inverse_gamma.qbk
+++ b/doc/distributions/inverse_gamma.qbk
@@ -12,10 +12,10 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
 
-      inverse_gamma_distribution(RealType shape, RealType scale = 1)
+      BOOST_MATH_GPU_ENABLED inverse_gamma_distribution(RealType shape, RealType scale = 1)
 
-      RealType shape()const;
-      RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType shape()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
    };
    
    }} // namespaces
@@ -63,18 +63,18 @@ varies as the parameters vary:
 
 [h4 Member Functions]
 
-   inverse_gamma_distribution(RealType shape = 1, RealType scale = 1);
+   BOOST_MATH_GPU_ENABLED inverse_gamma_distribution(RealType shape = 1, RealType scale = 1);
    
 Constructs an inverse gamma distribution with shape [alpha] and scale [beta].
 
 Requires that the shape and scale parameters are greater than zero, otherwise calls
 __domain_error.
 
-   RealType shape()const;
+   BOOST_MATH_GPU_ENABLED RealType shape()const;
    
 Returns the [alpha] shape parameter of this inverse gamma distribution.
    
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
       
 Returns the [beta] scale parameter of this inverse gamma distribution.
 
@@ -82,11 +82,16 @@ Returns the [beta] scale parameter of this inverse gamma distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variate is \[0,+[infin]\].
 [note Unlike some definitions, this implementation supports a random variate 
 equal to zero as a special case, returning zero for pdf and cdf.]
 
+In this distribution the implementation of `logpdf` is specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The inverse gamma distribution is implemented in terms of the 
@@ -99,12 +104,13 @@ But in general, inverse_gamma results are accurate to a few epsilon,
 [h4 Implementation]
 
 In the following table [alpha] is the shape parameter of the distribution, 
-[alpha] is its scale parameter, /x/ is the random variate, /p/ is the probability
+[beta] is its scale parameter, /x/ is the random variate, /p/ is the probability
 and /q = 1-p/.
 
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = __gamma_p_derivative([alpha], [beta]/ x, [beta]) / x * x ]]
+[[logpdf][log(pdf) = [alpha] * log([beta]) + (-[alpha]-1)*log(x) - lgamma([alpha]) - ([beta]/x) ]]
 [[cdf][Using the relation: p = __gamma_q([alpha], [beta] / x) ]]
 [[cdf complement][Using the relation: q = __gamma_p([alpha], [beta] / x) ]]
 [[quantile][Using the relation: x = [beta]/ __gamma_q_inv([alpha], p) ]]
diff --git a/doc/distributions/inverse_gaussian.qbk b/doc/distributions/inverse_gaussian.qbk
index c5b824385f..99ca4d7c25 100644
--- a/doc/distributions/inverse_gaussian.qbk
+++ b/doc/distributions/inverse_gaussian.qbk
@@ -12,11 +12,11 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
 
-      inverse_gaussian_distribution(RealType mean = 1, RealType scale = 1);
+      BOOST_MATH_GPU_ENABLED inverse_gaussian_distribution(RealType mean = 1, RealType scale = 1);
 
-      RealType mean()const; // mean default 1.
-      RealType scale()const; // Optional scale, default 1 (unscaled).
-      RealType shape()const; // Shape = scale/mean.
+      BOOST_MATH_GPU_ENABLED RealType mean()const; // mean default 1.
+      BOOST_MATH_GPU_ENABLED RealType scale()const; // Optional scale, default 1 (unscaled).
+      BOOST_MATH_GPU_ENABLED RealType shape()const; // Shape = scale/mean.
    };
    typedef inverse_gaussian_distribution<double> inverse_gaussian;
 
@@ -90,7 +90,7 @@ Another related parameterisation, the __wald_distrib (where mean [mu] is unity)
 
 [h4 Member Functions]
 
-   inverse_gaussian_distribution(RealType df = 1, RealType scale = 1); // optionally scaled.
+   BOOST_MATH_GPU_ENABLED inverse_gaussian_distribution(RealType df = 1, RealType scale = 1); // optionally scaled.
 
 Constructs an inverse_gaussian distribution with [mu] mean,
 and scale [lambda], with both default values 1.
@@ -98,11 +98,11 @@ and scale [lambda], with both default values 1.
 Requires that both the mean [mu] parameter and scale [lambda] are greater than zero,
 otherwise calls __domain_error.
 
-   RealType mean()const; 
+   BOOST_MATH_GPU_ENABLED RealType mean()const; 
    
 Returns the mean [mu] parameter of this distribution.
 
-   RealType scale()const; 
+   BOOST_MATH_GPU_ENABLED RealType scale()const; 
    
 Returns the scale [lambda] parameter of this distribution.
 
@@ -110,11 +110,16 @@ Returns the scale [lambda] parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variate is \[0,+[infin]).
 [note Unlike some definitions, this implementation supports a random variate 
 equal to zero as a special case, returning zero for both pdf and cdf.]
 
+In this distribution the implementation of `logpdf` is specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The inverse_gaussian distribution is implemented in terms of the 
@@ -134,6 +139,7 @@ are used for the inverse gaussian function.
 [table
 [[Function] [Implementation Notes] ]
 [[pdf] [ [sqrt]([lambda]/ 2[pi]x[super 3]) e[super -[lambda](x - [mu])[sup2]/ 2[mu][sup2]x]]]
+[[logpdf] [log(pdf) = (-[lambda]*pow([mu]-x, 2)/(x*[mu][super 2]) + log([lambda]) - 3*log(x) - log(2*[pi])) / 2 ]]
 [[cdf][ [Phi]{[sqrt]([lambda]/x) (x/[mu]-1)} + e[super 2[mu]/[lambda]] [Phi]{-[sqrt]([lambda]/[mu]) (1+x/[mu])} ]]
 [[cdf complement] [using complement of [Phi] above.] ]
 [[quantile][No closed form known. Estimated using a guess refined by Newton-Raphson iteration.]]
diff --git a/doc/distributions/landau.qbk b/doc/distributions/landau.qbk
new file mode 100644
index 0000000000..90dced0aa8
--- /dev/null
+++ b/doc/distributions/landau.qbk
@@ -0,0 +1,131 @@
+﻿[section:landau_dist Landau Distribution]
+
+``#include <boost/math/distributions/landau.hpp>``
+
+   template <class RealType = double,
+             class ``__Policy``   = ``__policy_class`` >
+   class landau_distribution;
+
+   typedef landau_distribution<> landau;
+
+   template <class RealType, class ``__Policy``>
+   class landau_distribution
+   {
+   public:
+      typedef RealType  value_type;
+      typedef Policy    policy_type;
+
+      BOOST_MATH_GPU_ENABLED landau_distribution(RealType location = 0, RealType scale = 1);
+
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType bias()const;
+   };
+
+The [@http://en.wikipedia.org/wiki/landau_distribution Landau distribution]
+is named after Lev Landau.
+It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution]
+with shape parameter [alpha]=1, [beta]=1.
+
+[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF]
+given by:
+
+[equation landau_ref1]  [/f(x; \mu, c)=\frac{1}{\pi c} \int_{0}^{\infty} \exp(-t) \cos \left( t \left( \frac{x-\mu}{c}\right) + \frac{2t}{\pi} \log \left( \frac{t}{c} \right)  \right) dt]
+
+The location parameter [mu] is the location of the distribution,
+while the scale parameter [c] determines the width of the distribution,
+but unlike other scalable distributions,
+it has a peculiarity that changes the location of the distribution.  If the location is
+zero, and the scale 1, then the result is a standard landau
+distribution.
+
+The distribution describe the statistical property of the energy loss by
+charged particles as they traversing a thin layer of matter.
+
+The following graph shows how the distributions moves as the
+location parameter changes:
+
+[graph landau_pdf1]
+
+While the following graph shows how the shape (scale) parameter alters
+the distribution:
+
+[graph landau_pdf2]
+
+[h4 Member Functions]
+
+   BOOST_MATH_GPU_ENABLED landau_distribution(RealType location = 0, RealType scale = 1);
+
+Constructs a landau distribution, with location parameter /location/
+and scale parameter /scale/.  When these parameters take their default
+values (location = 0, scale = 1)
+then the result is a Standard landau Distribution.
+
+Requires scale > 0, otherwise calls __domain_error.
+
+   BOOST_MATH_GPU_ENABLED RealType location()const;
+
+Returns the location parameter of the distribution.
+
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
+
+Returns the scale parameter of the distribution.
+
+   BOOST_MATH_GPU_ENABLED RealType bias()const;
+
+Returns the amount of translation by the scale parameter.
+[expression bias = - 2 / [pi] log(c)]
+
+[h4 Non-member Accessors]
+
+All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
+that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
+
+Note however that the landau distribution does not have a mean,
+standard deviation, etc. See __math_undefined
+[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function]
+to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE,
+which is the default.
+
+Alternately, the functions __mean, __sd,
+__variance, __skewness, __kurtosis and __kurtosis_excess will all
+return a __domain_error if called.
+
+The domain of the random variable is \[-[max_value], +[min_value]\].
+
+[h4 Accuracy]
+
+The error is within 4 epsilon except for the rapidly decaying left tail.
+
+Errors in the PDF at 64-bit double precision:
+
+[$../graphs/landau_pdf_accuracy_64.png]
+
+Errors in the CDF at 64-bit double precision:
+
+[$../graphs/landau_cdf_accuracy_64.png]
+
+Errors in the CDF-complement at 64-bit double precision:
+
+[$../graphs/landau_ccdf_accuracy_64.png]
+
+[h4 Implementation]
+
+See references.
+
+[h4 References]
+
+* [@http://en.wikipedia.org/wiki/landau_distribution landau distribution]
+* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for Landau Distribution,
+DOI: 10.36227/techrxiv.171822215.53612870/v2, 2024.
+
+[endsect][/section:landau_dist landau]
+
+[/ landau.qbk
+  Copyright Takuma Yoshimura 2024.
+  Distributed under the Boost Software License, Version 1.0.
+  (See accompanying file LICENSE_1_0.txt or copy at
+  http://www.boost.org/LICENSE_1_0.txt).
+]
diff --git a/doc/distributions/laplace.qbk b/doc/distributions/laplace.qbk
index 93327e0228..6115efcb8b 100644
--- a/doc/distributions/laplace.qbk
+++ b/doc/distributions/laplace.qbk
@@ -17,10 +17,10 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Construct:
-      laplace_distribution(RealType location = 0, RealType scale = 1);
+      BOOST_MATH_GPU_ENABLED laplace_distribution(RealType location = 0, RealType scale = 1);
       // Accessors:
-      RealType location()const;
-      RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
    };
 
    }} // namespaces
@@ -49,7 +49,7 @@ Note that the domain of the random variable remains
 
 [h4 Member Functions]
 
-   laplace_distribution(RealType location = 0, RealType scale = 1);
+   BOOST_MATH_GPU_ENABLED laplace_distribution(RealType location = 0, RealType scale = 1);
 
 Constructs a laplace distribution with location /location/ and
 scale /scale/.
@@ -61,11 +61,11 @@ The scale parameter is proportional to the standard deviation of the random vari
 Requires that the scale parameter is greater than zero, otherwise calls
 __domain_error.
 
-   RealType location()const;
+   BOOST_MATH_GPU_ENABLED RealType location()const;
 
 Returns the /location/ parameter of this distribution.
 
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
 
 Returns the /scale/ parameter of this distribution.
 
@@ -73,9 +73,14 @@ Returns the /scale/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[-[infin],+[infin]\].
 
+In this distribution the implementation of both `logcdf`, and `logpdf` are specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The laplace distribution is implemented in terms of the
@@ -90,11 +95,19 @@ and its complement /q = 1-p/.
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = e[super -abs(x-[mu]) \/ [sigma]] \/ (2 * [sigma]) ]]
+[[logpdf][log(pdf) = -abs(x-[mu])/[sigma] - log([sigma]) - log(2) ]]
 [[cdf][Using the relations:
 
 x <  [mu] : p =  e[super (x-[mu])/[sigma] ] \/ [sigma]
 
 x >= [mu] : p =  1 - e[super ([mu]-x)/[sigma] ] \/ [sigma]
+]]
+[[logcdf][log(cdf) = 
+
+x <  [mu] : p = ((x - [mu]) / [sigma]) - ln(2)
+
+x >= [mu] : p = log1p(-exp(([mu]-x) / [sigma]) / 2)
+
 ]]
 [[cdf complement][Using the relation:
 
diff --git a/doc/distributions/logistic.qbk b/doc/distributions/logistic.qbk
index 0a22b48d42..dc42a5d8b3 100644
--- a/doc/distributions/logistic.qbk
+++ b/doc/distributions/logistic.qbk
@@ -15,10 +15,10 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Construct:
-      logistic_distribution(RealType location = 0, RealType scale = 1);
+      BOOST_MATH_GPU_ENABLED logistic_distribution(RealType location = 0, RealType scale = 1);
       // Accessors:
-      RealType location()const; // location.
-      RealType scale()const; // scale.
+      BOOST_MATH_GPU_ENABLED RealType location()const; // location.
+      BOOST_MATH_GPU_ENABLED RealType scale()const; // scale.
       
    };
 
@@ -39,17 +39,17 @@ parameters change:
 
 [h4 Member Functions]
 
-   logistic_distribution(RealType u = 0, RealType s = 1);
+   BOOST_MATH_GPU_ENABLED logistic_distribution(RealType u = 0, RealType s = 1);
 
 Constructs a logistic distribution with location /u/ and scale /s/.
 
 Requires `scale > 0`, otherwise a __domain_error is raised.
 
-   RealType location()const;   
+   BOOST_MATH_GPU_ENABLED RealType location()const;   
 
 Returns the location of this distribution.
 
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
 
 Returns the scale of this distribution. 
 
@@ -57,6 +57,8 @@ Returns the scale of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[-\[max_value\], +\[min_value\]\]. 
 However, the pdf and cdf support inputs of +[infin] and -[infin]
@@ -67,6 +69,9 @@ At `p=1` and `p=0`, the quantile function returns the result of
 quantile function returns the result of -__overflow_error and 
 +__overflow_error respectively. 
 
+In this distribution the implementation of `logcdf` is specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The logistic distribution is implemented in terms of the `std::exp` 
@@ -82,6 +87,7 @@ in such cases, only a low /absolute error/ can be guaranteed.
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = e[super -(x-u)/s] / (s*(1+e[super -(x-u)/s])[super 2])]]
 [[cdf][Using the relation: p = 1/(1+e[super -(x-u)/s])]]
+[[logcdf][log(cdf) = -log1p(exp((u-x)/s)) ]]
 [[cdf complement][Using the relation: q = 1/(1+e[super (x-u)/s])]]
 [[quantile][Using the relation: x = u - s*log(1/p-1)]]
 [[quantile from the complement][Using the relation: x = u + s*log(p/1-p)]]
diff --git a/doc/distributions/lognormal.qbk b/doc/distributions/lognormal.qbk
index 6e76043570..901b59ed82 100644
--- a/doc/distributions/lognormal.qbk
+++ b/doc/distributions/lognormal.qbk
@@ -17,10 +17,10 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Construct:
-      lognormal_distribution(RealType location = 0, RealType scale = 1);
+      BOOST_MATH_GPU_ENABLED lognormal_distribution(RealType location = 0, RealType scale = 1);
       // Accessors:
-      RealType location()const;
-      RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
    };
    
    }} // namespaces
@@ -51,7 +51,7 @@ The next graph illustrates the effect of the scale parameter on the PDF:
 
 [h4 Member Functions]
 
-   lognormal_distribution(RealType location = 0, RealType scale = 1);
+   BOOST_MATH_GPU_ENABLED lognormal_distribution(RealType location = 0, RealType scale = 1);
    
 Constructs a lognormal distribution with location /location/ and 
 scale /scale/.
@@ -65,11 +65,11 @@ logarithm of the random variate.
 Requires that the scale parameter is greater than zero, otherwise calls
 __domain_error.
 
-   RealType location()const;
+   BOOST_MATH_GPU_ENABLED RealType location()const;
    
 Returns the /location/ parameter of this distribution.
    
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
       
 Returns the /scale/ parameter of this distribution.
 
@@ -77,6 +77,8 @@ Returns the /scale/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0,+[infin]\].
 
diff --git a/doc/distributions/mapairy.qbk b/doc/distributions/mapairy.qbk
new file mode 100644
index 0000000000..817fb980da
--- /dev/null
+++ b/doc/distributions/mapairy.qbk
@@ -0,0 +1,121 @@
+﻿[section:mapairy_dist Map-Airy Distribution]
+
+``#include <boost/math/distributions/mapairy.hpp>``
+
+   template <class RealType = double,
+             class ``__Policy``   = ``__policy_class`` >
+   class mapairy_distribution;
+
+   typedef mapairy_distribution<> mapairy;
+
+   template <class RealType, class ``__Policy``>
+   class mapairy_distribution
+   {
+   public:
+      typedef RealType  value_type;
+      typedef Policy    policy_type;
+
+      BOOST_MATH_GPU_ENABLED mapairy_distribution(RealType location = 0, RealType scale = 1);
+
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
+   };
+
+It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution]
+with shape parameter [alpha]=3/2, [beta]=1.
+
+This distribution is also defined as [beta] = −1, which is inverted about the x-axis.
+
+[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF]
+given by:
+
+[equation mapairy_ref1]  [/f(x; \mu=0, c=1/\sqrt[3]{18}) = 2 \exp \left( \frac{2}{3} x^3 \right) \left( -x \mathrm{Ai}(x^2) - \mathrm{Ai}'(x^2) \right)]
+
+The location parameter [mu] is the location of the distribution,
+while the scale parameter [c] determines the width of the distribution.  If the location is
+zero, and the scale 1, then the result is a standard map-airy
+distribution.
+
+The distribution describes the probability distribution of the area under a Brownian excursion over a unit interval.
+
+The following graph shows how the distributions moves as the
+location parameter changes:
+
+[graph mapairy_pdf1]
+
+While the following graph shows how the shape (scale) parameter alters
+the distribution:
+
+[graph mapairy_pdf2]
+
+[h4 Member Functions]
+
+   BOOST_MATH_GPU_ENABLED mapairy_distribution(RealType location = 0, RealType scale = 1);
+
+Constructs a mapairy distribution, with location parameter /location/
+and scale parameter /scale/.  When these parameters take their default
+values (location = 0, scale = 1)
+then the result is a Standard map-airy Distribution.
+
+Requires scale > 0, otherwise calls __domain_error.
+
+   BOOST_MATH_GPU_ENABLED RealType location()const;
+
+Returns the location parameter of the distribution.
+
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
+
+Returns the scale parameter of the distribution.
+
+[h4 Non-member Accessors]
+
+All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
+that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
+
+Note however that the map-airy distribution does not have a skewness,
+kurtosis, etc. See __math_undefined
+[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function]
+to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE,
+which is the default.
+
+Alternately, the functions __skewness, __kurtosis and __kurtosis_excess will all
+return a __domain_error if called.
+
+The domain of the random variable is \[-[max_value], +[min_value]\].
+
+[h4 Accuracy]
+
+The error is within 4 epsilon except for the rapidly decaying left tail.
+
+Errors in the PDF at 64-bit double precision:
+
+[$../graphs/mapairy_pdf_accuracy_64.png]
+
+Errors in the CDF at 64-bit double precision:
+
+[$../graphs/mapairy_cdf_accuracy_64.png]
+
+Errors in the CDF-complement at 64-bit double precision:
+
+[$../graphs/mapairy_ccdf_accuracy_64.png]
+
+[h4 Implementation]
+
+See references.
+
+[h4 References]
+
+* [@https://mathworld.wolfram.com/Map-AiryDistribution.html Wolfram MathWorld: Map-Airy Distribution]
+* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for Map-Airy Distribution,
+DOI: 10.36227/techrxiv.172053942.27675733/v1, 2024.
+
+[endsect][/section:mapairy_dist mapairy]
+
+[/ mapairy.qbk
+  Copyright Takuma Yoshimura 2024.
+  Distributed under the Boost Software License, Version 1.0.
+  (See accompanying file LICENSE_1_0.txt or copy at
+  http://www.boost.org/LICENSE_1_0.txt).
+]
diff --git a/doc/distributions/nc_beta.qbk b/doc/distributions/nc_beta.qbk
index 8e2c816559..478b545020 100644
--- a/doc/distributions/nc_beta.qbk
+++ b/doc/distributions/nc_beta.qbk
@@ -18,14 +18,14 @@
       typedef Policy    policy_type;
 
       // Constructor:
-      non_central_beta_distribution(RealType alpha, RealType beta, RealType lambda);
+      BOOST_MATH_GPU_ENABLED non_central_beta_distribution(RealType alpha, RealType beta, RealType lambda);
 
       // Accessor to shape parameters:
-      RealType alpha()const;
-      RealType beta()const;
+      BOOST_MATH_GPU_ENABLED RealType alpha()const;
+      BOOST_MATH_GPU_ENABLED RealType beta()const;
 
       // Accessor to non-centrality parameter lambda:
-      RealType non_centrality()const;
+      BOOST_MATH_GPU_ENABLED RealType non_centrality()const;
    };
 
    }} // namespaces
@@ -59,22 +59,22 @@ for different values of [lambda]:
 
 [h4 Member Functions]
 
-      non_central_beta_distribution(RealType a, RealType b, RealType lambda);
+      BOOST_MATH_GPU_ENABLED non_central_beta_distribution(RealType a, RealType b, RealType lambda);
 
 Constructs a noncentral beta distribution with shape parameters /a/ and /b/
 and non-centrality parameter /lambda/.
 
 Requires a > 0, b > 0 and lambda >= 0, otherwise calls __domain_error.
 
-      RealType alpha()const;
+      BOOST_MATH_GPU_ENABLED RealType alpha()const;
 
 Returns the parameter /a/ from which this object was constructed.
 
-      RealType beta()const;
+      BOOST_MATH_GPU_ENABLED RealType beta()const;
 
 Returns the parameter /b/ from which this object was constructed.
 
-      RealType non_centrality()const;
+      BOOST_MATH_GPU_ENABLED RealType non_centrality()const;
 
 Returns the parameter /lambda/ from which this object was constructed.
 
@@ -83,6 +83,8 @@ Returns the parameter /lambda/ from which this object was constructed.
 Most of the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 are supported: __cdf, __pdf, __quantile, __mean, __variance, __sd,
 __median, __mode, __hazard, __chf, __range and __support.
+For this distribution these functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 Mean and variance are implemented using hypergeometric pfq functions and relations given in
 [@http://reference.wolfram.com/mathematica/ref/NoncentralBetaDistribution.html Wolfram Noncentral Beta Distribution].
diff --git a/doc/distributions/nc_chi_squared.qbk b/doc/distributions/nc_chi_squared.qbk
index 72235db6a3..9ab0f6f8d2 100644
--- a/doc/distributions/nc_chi_squared.qbk
+++ b/doc/distributions/nc_chi_squared.qbk
@@ -18,22 +18,22 @@
       typedef Policy    policy_type;
 
       // Constructor:
-      non_central_chi_squared_distribution(RealType v, RealType lambda);
+      BOOST_MATH_GPU_ENABLED non_central_chi_squared_distribution(RealType v, RealType lambda);
 
       // Accessor to degrees of freedom parameter v:
-      RealType degrees_of_freedom()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const;
 
       // Accessor to non centrality parameter lambda:
-      RealType non_centrality()const;
+      BOOST_MATH_GPU_ENABLED RealType non_centrality()const;
 
       // Parameter finders:
-      static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p);
+      BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p);
       template <class A, class B, class C>
-      static RealType find_degrees_of_freedom(const complemented3_type<A,B,C>& c);
+      BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(const complemented3_type<A,B,C>& c);
 
-      static RealType find_non_centrality(RealType v, RealType x, RealType p);
+      BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(RealType v, RealType x, RealType p);
       template <class A, class B, class C>
-      static RealType find_non_centrality(const complemented3_type<A,B,C>& c);
+      BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(const complemented3_type<A,B,C>& c);
    };
 
    }} // namespaces
@@ -70,43 +70,42 @@ for different values of [lambda]:
 
 [h4 Member Functions]
 
-      non_central_chi_squared_distribution(RealType v, RealType lambda);
+      BOOST_MATH_GPU_ENABLED non_central_chi_squared_distribution(RealType v, RealType lambda);
 
 Constructs a Chi-Squared distribution with [nu] degrees of freedom
 and non-centrality parameter /lambda/.
 
 Requires [nu] > 0 and lambda >= 0, otherwise calls __domain_error.
 
-      RealType degrees_of_freedom()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const;
 
 Returns the parameter [nu] from which this object was constructed.
 
-      RealType non_centrality()const;
+      BOOST_MATH_GPU_ENABLED RealType non_centrality()const;
 
 Returns the parameter /lambda/ from which this object was constructed.
 
-
-   static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p);
+      BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(RealType lambda, RealType x, RealType p);
 
 This function returns the number of degrees of freedom [nu] such that:
 `cdf(non_central_chi_squared<RealType, Policy>(v, lambda), x) == p`
 
-   template <class A, class B, class C>
-   static RealType find_degrees_of_freedom(const complemented3_type<A,B,C>& c);
+      template <class A, class B, class C>
+      BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(const complemented3_type<A,B,C>& c);
 
 When called with argument `boost::math::complement(lambda, x, q)`
 this function returns the number of degrees of freedom [nu] such that:
 
 `cdf(complement(non_central_chi_squared<RealType, Policy>(v, lambda), x)) == q`.
 
-   static RealType find_non_centrality(RealType v, RealType x, RealType p);
+      BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(RealType v, RealType x, RealType p);
 
 This function returns the non centrality parameter /lambda/ such that:
 
 `cdf(non_central_chi_squared<RealType, Policy>(v, lambda), x) == p`
 
-   template <class A, class B, class C>
-   static RealType find_non_centrality(const complemented3_type<A,B,C>& c);
+      template <class A, class B, class C>
+      BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(const complemented3_type<A,B,C>& c);
 
 When called with argument `boost::math::complement(v, x, q)`
 this function returns the non centrality parameter /lambda/ such that:
@@ -117,6 +116,8 @@ this function returns the non centrality parameter /lambda/ such that:
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0, +[infin]\].
 
diff --git a/doc/distributions/nc_f.qbk b/doc/distributions/nc_f.qbk
index 6436c34336..d31c8116bc 100644
--- a/doc/distributions/nc_f.qbk
+++ b/doc/distributions/nc_f.qbk
@@ -18,14 +18,14 @@
       typedef Policy    policy_type;
 
       // Constructor:
-      non_central_f_distribution(RealType v1, RealType v2, RealType lambda);
+      BOOST_MATH_GPU_ENABLED non_central_f_distribution(RealType v1, RealType v2, RealType lambda);
 
       // Accessor to degrees_of_freedom parameters v1 & v2:
-      RealType degrees_of_freedom1()const;
-      RealType degrees_of_freedom2()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const;
 
       // Accessor to non-centrality parameter lambda:
-      RealType non_centrality()const;
+      BOOST_MATH_GPU_ENABLED RealType non_centrality()const;
    };
    
    }} // namespaces
@@ -55,22 +55,22 @@ for different values of [lambda]:
 
 [h4 Member Functions]
 
-      non_central_f_distribution(RealType v1, RealType v2, RealType lambda);
+      BOOST_MATH_GPU_ENABLED non_central_f_distribution(RealType v1, RealType v2, RealType lambda);
       
 Constructs a non-central beta distribution with parameters /v1/ and /v2/
 and non-centrality parameter /lambda/.
 
 Requires /v1/ > 0, /v2/ > 0 and lambda >= 0, otherwise calls __domain_error.
 
-      RealType degrees_of_freedom1()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const;
       
 Returns the parameter /v1/ from which this object was constructed.
 
-      RealType degrees_of_freedom2()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const;
       
 Returns the parameter /v2/ from which this object was constructed.
 
-      RealType non_centrality()const;
+      BOOST_MATH_GPU_ENABLED RealType non_centrality()const;
       
 Returns the non-centrality parameter /lambda/ from which this object was constructed.
 
@@ -78,6 +78,8 @@ Returns the non-centrality parameter /lambda/ from which this object was constru
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0, +[infin]\].
 
diff --git a/doc/distributions/negative_binomial.qbk b/doc/distributions/negative_binomial.qbk
index ee61beef3d..5a23ce23aa 100644
--- a/doc/distributions/negative_binomial.qbk
+++ b/doc/distributions/negative_binomial.qbk
@@ -17,28 +17,28 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Constructor from successes and success_fraction:
-      negative_binomial_distribution(RealType r, RealType p);
+      BOOST_MATH_GPU_ENABLED negative_binomial_distribution(RealType r, RealType p);
 
       // Parameter accessors:
-      RealType success_fraction() const;
-      RealType successes() const;
+      BOOST_MATH_GPU_ENABLED RealType success_fraction() const;
+      BOOST_MATH_GPU_ENABLED RealType successes() const;
 
       // Bounds on success fraction:
-      static RealType find_lower_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p(
          RealType trials,
          RealType successes,
          RealType probability); // alpha
-      static RealType find_upper_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p(
          RealType trials,
          RealType successes,
          RealType probability); // alpha
 
       // Estimate min/max number of trials:
-      static RealType find_minimum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials(
          RealType k,     // Number of failures.
          RealType p,     // Success fraction.
          RealType probability); // Probability threshold alpha.
-      static RealType find_maximum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials(
          RealType k,     // Number of failures.
          RealType p,     // Success fraction.
          RealType probability); // Probability threshold alpha.
@@ -112,7 +112,7 @@ poisson([lambda]) = lim [sub r [rarr] [infin]] negative_binomial(r, r / ([lambda
 
 [h5 Construct]
 
-   negative_binomial_distribution(RealType r, RealType p);
+   BOOST_MATH_GPU_ENABLED negative_binomial_distribution(RealType r, RealType p);
 
 Constructor: /r/ is the total number of successes, /p/ is the
 probability of success of a single trial.
@@ -121,11 +121,11 @@ Requires: `r > 0` and `0 <= p <= 1`.
 
 [h5 Accessors]
 
-   RealType success_fraction() const; // successes / trials (0 <= p <= 1)
+   BOOST_MATH_GPU_ENABLED RealType success_fraction() const; // successes / trials (0 <= p <= 1)
 
 Returns the parameter /p/ from which this distribution was constructed.
 
-   RealType successes() const; // required successes (r > 0)
+   BOOST_MATH_GPU_ENABLED RealType successes() const; // required successes (r > 0)
 
 Returns the parameter /r/ from which this distribution was constructed.
 
@@ -134,7 +134,7 @@ see __binomial_distrib for more discussion.
 
 [h5 Lower Bound on Parameter p]
 
-      static RealType find_lower_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p(
         RealType failures,
         RealType successes,
         RealType probability) // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
@@ -170,7 +170,7 @@ Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
 
 [h5 Upper Bound on Parameter p]
 
-   static RealType find_upper_bound_on_p(
+   BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p(
       RealType trials,
       RealType successes,
       RealType alpha); // (0 <= alpha <= 1), 0.05 equivalent to 95% confidence.
@@ -206,7 +206,7 @@ Computational statistics and data analysis, 2005, vol. 48, no3, 605-621].
 
 [h5 Estimating Number of Trials to Ensure at Least a Certain Number of Failures]
 
-   static RealType find_minimum_number_of_trials(
+   BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials(
       RealType k,     // number of failures.
       RealType p,     // success fraction.
       RealType alpha); // probability threshold (0.05 equivalent to 95%).
@@ -236,7 +236,7 @@ of observing k failures or fewer.
 
 [h5 Estimating Number of Trials to Ensure a Maximum Number of Failures or Less]
 
-   static RealType find_maximum_number_of_trials(
+   BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials(
       RealType k,     // number of failures.
       RealType p,     // success fraction.
       RealType alpha); // probability threshold (0.05 equivalent to 95%).
@@ -266,6 +266,8 @@ of observing more than k failures.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 However it's worth taking a moment to define what these actually mean in
 the context of this distribution:
diff --git a/doc/distributions/non_members.qbk b/doc/distributions/non_members.qbk
index 9c900bf127..99728b8700 100644
--- a/doc/distributions/non_members.qbk
+++ b/doc/distributions/non_members.qbk
@@ -17,6 +17,8 @@ to go straight to the function you want if you already know its name.
 * __hazard.
 * __kurtosis.
 * __kurtosis_excess
+* __logcdf.
+* __logpdf.
 * __mean.
 * __median.
 * __mode.
@@ -349,6 +351,20 @@ Kurtosis excess can have a value from -2 to + infinity.
 
 The kurtosis excess of a normal distribution is zero.
 
+[h4:logcdf Natural Log of the Cumulative Distribution Function]
+
+   template <class RealType, ``__Policy``>
+   RealType logcdf(const ``['Distribution-Type]``<RealType, ``__Policy``>& dist);
+
+Returns the natural log of the CDF of distribution /dist/.
+
+[h4:logpdf Natural Log of the Probability Density Function]
+
+   template <class RealType, ``__Policy``>
+   RealType logcdf(const ``['Distribution-Type]``<RealType, ``__Policy``>& dist);
+
+Returns the natural log of the CDF of distribution /dist/.
+
 [h4:cdfPQ P and Q]
 
 The terms P and Q are sometimes used to refer to the __cdf
diff --git a/doc/distributions/normal.qbk b/doc/distributions/normal.qbk
index 52ac44e96b..8e0f0c8fba 100644
--- a/doc/distributions/normal.qbk
+++ b/doc/distributions/normal.qbk
@@ -17,13 +17,13 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Construct:
-      normal_distribution(RealType mean = 0, RealType sd = 1);
+      BOOST_MATH_GPU_ENABLED normal_distribution(RealType mean = 0, RealType sd = 1);
       // Accessors:
-      RealType mean()const; // location.
-      RealType standard_deviation()const; // scale.
+      BOOST_MATH_GPU_ENABLED RealType mean()const; // location.
+      BOOST_MATH_GPU_ENABLED RealType standard_deviation()const; // scale.
       // Synonyms, provided to allow generic use of find_location and find_scale.
-      RealType location()const;
-      RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
    };
 
    }} // namespaces
@@ -53,20 +53,20 @@ and illustrated by this graph
 
 [h4 Member Functions]
 
-   normal_distribution(RealType mean = 0, RealType sd = 1);
+   BOOST_MATH_GPU_ENABLED normal_distribution(RealType mean = 0, RealType sd = 1);
 
 Constructs a normal distribution with mean /mean/ and
 standard deviation /sd/.
 
 Requires /sd/ > 0, otherwise __domain_error is called.
 
-   RealType mean()const;
-   RealType location()const;
+   BOOST_MATH_GPU_ENABLED RealType mean()const;
+   BOOST_MATH_GPU_ENABLED RealType location()const;
 
 both return the /mean/ of this distribution.
 
-   RealType standard_deviation()const;
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType standard_deviation()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
 
 both return the /standard deviation/ of this distribution.
 (Redundant location and scale function are provided to match other similar distributions,
@@ -76,6 +76,8 @@ allowing the functions find_location and find_scale to be used generically).
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[-[max_value], +[min_value]\].
 However, the pdf of +[infin] and -[infin] = 0 is also supported,
@@ -97,6 +99,7 @@ and /s/ is its standard deviation.
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = e[super -(x-m)[super 2]\/(2s[super 2])] \/ (s * sqrt(2*pi)) ]]
+[[logpdf][log(pdf) = -log(s) - log(2*[pi])/2 - (x-mean)[super 2]/(2*s[super 2]) ]]
 [[cdf][Using the relation: p = 0.5 * __erfc(-(x-m)/(s*sqrt(2))) ]]
 [[cdf complement][Using the relation: q = 0.5 * __erfc((x-m)/(s*sqrt(2))) ]]
 [[quantile][Using the relation: x = m - s * sqrt(2) * __erfc_inv(2*p)]]
diff --git a/doc/distributions/pareto.qbk b/doc/distributions/pareto.qbk
index fcc7eee425..0161282ec9 100644
--- a/doc/distributions/pareto.qbk
+++ b/doc/distributions/pareto.qbk
@@ -17,10 +17,10 @@
    public:
       typedef RealType value_type;
       // Constructor:
-      pareto_distribution(RealType scale = 1, RealType shape = 1)
+      BOOST_MATH_GPU_ENABLED pareto_distribution(RealType scale = 1, RealType shape = 1)
       // Accessors:
-      RealType scale()const;
-      RealType shape()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType shape()const;
    };
 
    }} // namespaces
@@ -50,7 +50,7 @@ And this graph illustrates how the PDF varies with the shape parameter [alpha]:
 
 [h4 Member Functions]
 
-   pareto_distribution(RealType scale = 1, RealType shape = 1);
+   BOOST_MATH_GPU_ENABLED pareto_distribution(RealType scale = 1, RealType shape = 1);
 
 Constructs a [@http://en.wikipedia.org/wiki/pareto_distribution
 pareto distribution] with shape /shape/ and scale /scale/.
@@ -58,11 +58,11 @@ pareto distribution] with shape /shape/ and scale /scale/.
 Requires that the /shape/ and /scale/ parameters are both greater than zero,
 otherwise calls __domain_error.
 
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
 
 Returns the /scale/ parameter of this distribution.
 
-   RealType shape()const;
+   BOOST_MATH_GPU_ENABLED RealType shape()const;
 
 Returns the /shape/ parameter of this distribution.
 
@@ -70,9 +70,14 @@ Returns the /shape/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The supported domain of the random variable is \[scale, [infin]\].
 
+In this distribution the implementation of `logcdf` is specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The Pareto distribution is implemented in terms of the
@@ -91,6 +96,7 @@ and its complement /q = 1-p/.
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf p = [alpha][beta][super [alpha]]/x[super [alpha] +1] ]]
 [[cdf][Using the relation: cdf p = 1 - ([beta] / x)[super [alpha]] ]]
+[[logcdf][log(cdf) = log1p(-pow([beta]/x, [alpha])) ]]
 [[cdf complement][Using the relation: q = 1 - p = -([beta] / x)[super [alpha]] ]]
 [[quantile][Using the relation: x = [beta] / (1 - p)[super 1/[alpha]] ]]
 [[quantile from the complement][Using the relation: x =  [beta] / (q)[super 1/[alpha]] ]]
diff --git a/doc/distributions/poisson.qbk b/doc/distributions/poisson.qbk
index 50a3b12cea..533a6d6bad 100644
--- a/doc/distributions/poisson.qbk
+++ b/doc/distributions/poisson.qbk
@@ -17,8 +17,8 @@
     typedef RealType value_type;
     typedef Policy   policy_type;
     
-    poisson_distribution(RealType mean = 1); // Constructor.
-    RealType mean()const; // Accessor.
+    BOOST_MATH_GPU_ENABLED poisson_distribution(RealType mean = 1); // Constructor.
+    BOOST_MATH_GPU_ENABLED RealType mean()const; // Accessor.
   }
    
   }} // namespaces boost::math
@@ -47,11 +47,11 @@ The following graph illustrates how the PDF varies with the parameter [lambda]:
 
 [h4 Member Functions]
 
-   poisson_distribution(RealType mean = 1);
+   BOOST_MATH_GPU_ENABLED poisson_distribution(RealType mean = 1);
    
 Constructs a poisson distribution with mean /mean/.
 
-   RealType mean()const;
+   BOOST_MATH_GPU_ENABLED RealType mean()const;
    
 Returns the /mean/ of this distribution.
    
@@ -59,9 +59,14 @@ Returns the /mean/ of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0, [infin]\].
 
+In this distribution the implementation of `logpdf` is specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The Poisson distribution is implemented in terms of the 
@@ -81,6 +86,7 @@ In the following table [lambda] is the mean of the distribution,
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = e[super -[lambda]] [lambda][super k] \/ k! ]]
+[[logpdf][log(pdf) = -lgamma(k+1) + k*log([lambda]) - [lambda] if k > 0 and [lambda] > 0 ]]
 [[cdf][Using the relation: p = [Gamma](k+1, [lambda]) \/ k! = __gamma_q(k+1, [lambda])]]
 [[cdf complement][Using the relation: q = __gamma_p(k+1, [lambda]) ]]
 [[quantile][Using the relation: k = __gamma_q_inva([lambda], p) - 1]]
diff --git a/doc/distributions/rayleigh.qbk b/doc/distributions/rayleigh.qbk
index 5fd6fe44c2..a28d5f577b 100644
--- a/doc/distributions/rayleigh.qbk
+++ b/doc/distributions/rayleigh.qbk
@@ -18,9 +18,9 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Construct:
-      rayleigh_distribution(RealType sigma = 1)
+      BOOST_MATH_GPU_ENABLED rayleigh_distribution(RealType sigma = 1)
       // Accessors:
-      RealType sigma()const;
+      BOOST_MATH_GPU_ENABLED RealType sigma()const;
    };
 
    }} // namespaces
@@ -58,7 +58,7 @@ and [@http://en.wikipedia.org/wiki/Weibull_distribution Weibull] distributions a
 
 [h4 Member Functions]
 
-   rayleigh_distribution(RealType sigma = 1);
+   BOOST_MATH_GPU_ENABLED rayleigh_distribution(RealType sigma = 1);
 
 Constructs a [@http://en.wikipedia.org/wiki/Rayleigh_distribution
 Rayleigh distribution] with [sigma] /sigma/.
@@ -66,7 +66,7 @@ Rayleigh distribution] with [sigma] /sigma/.
 Requires that the [sigma] parameter is greater than zero,
 otherwise calls __domain_error.
 
-   RealType sigma()const;
+   BOOST_MATH_GPU_ENABLED RealType sigma()const;
 
 Returns the /sigma/ parameter of this distribution.
 
@@ -74,9 +74,14 @@ Returns the /sigma/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0, max_value\].
 
+In this distribution the implementation of both `logcdf`, and `logpdf` are specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The Rayleigh distribution is implemented in terms of the
@@ -92,7 +97,9 @@ In the following table [sigma] is the sigma parameter of the distribution,
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = x * exp(-x[super 2])/2 [sigma][super 2] ]]
+[[logpdf][log(pdf) = -(x[super 2])/(2*[sigma][super 2]) - 2*log([sigma]) + log(x) ]]
 [[cdf][Using the relation: p = 1 - exp(-x[super 2]/2) [sigma][super 2]= -__expm1(-x[super 2]/2) [sigma][super 2]]]
+[[logcdf][log(cdf) = log1p(-exp(-x[super 2] / (2*[sigma][super 2]))) ]]
 [[cdf complement][Using the relation: q =  exp(-x[super 2]/ 2) * [sigma][super 2] ]]
 [[quantile][Using the relation: x = sqrt(-2 * [sigma] [super 2]) * log(1 - p)) = sqrt(-2 * [sigma] [super 2]) * __log1p(-p))]]
 [[quantile from the complement][Using the relation: x = sqrt(-2 * [sigma] [super 2]) * log(q)) ]]
diff --git a/doc/distributions/saspoint5.qbk b/doc/distributions/saspoint5.qbk
new file mode 100644
index 0000000000..06efbd32e5
--- /dev/null
+++ b/doc/distributions/saspoint5.qbk
@@ -0,0 +1,116 @@
+﻿[section:saspoint5_dist S[alpha]S Point5 Distribution]
+
+``#include <boost/math/distributions/saspoint5.hpp>``
+
+   template <class RealType = double,
+             class ``__Policy``   = ``__policy_class`` >
+   class saspoint5_distribution;
+
+   typedef saspoint5_distribution<> saspoint5;
+
+   template <class RealType, class ``__Policy``>
+   class saspoint5_distribution
+   {
+   public:
+      typedef RealType  value_type;
+      typedef Policy    policy_type;
+
+      BOOST_MATH_GPU_ENABLED saspoint5_distribution(RealType location = 0, RealType scale = 1);
+
+      BOOST_MATH_GPU_ENABLED RealType location()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
+   };
+
+It is special case of a [@http://en.wikipedia.org/wiki/Stable_distribution stable distribution]
+with shape parameter [alpha]=1/2, [beta]=0.
+
+[@http://en.wikipedia.org/wiki/Probability_distribution probability distribution function PDF]
+given by:
+
+[equation saspoint5_ref1]  [/f(x; \mu, c)=\frac{1}{2 \pi} \int_{-\infty}^{\infty} \exp( i t \mu - \sqrt{|c t|} ) e^{-i x t} dt]
+
+The location parameter [mu] is the location of the distribution,
+while the scale parameter [c] determines the width of the distribution.
+If the location is
+zero, and the scale 1, then the result is a standard S[alpha]S Point5
+distribution.
+
+This distribution has heavier tails than the Cauchy distribution.
+
+The following graph shows how the distributions moves as the
+location parameter changes:
+
+[graph saspoint5_pdf1]
+
+While the following graph shows how the shape (scale) parameter alters
+the distribution:
+
+[graph saspoint5_pdf2]
+
+[h4 Member Functions]
+
+   BOOST_MATH_GPU_ENABLED saspoint5_distribution(RealType location = 0, RealType scale = 1);
+
+Constructs a S[alpha]S Point5 distribution, with location parameter /location/
+and scale parameter /scale/.  When these parameters take their default
+values (location = 0, scale = 1)
+then the result is a Standard S[alpha]S Point5 Distribution.
+
+Requires scale > 0, otherwise calls __domain_error.
+
+   BOOST_MATH_GPU_ENABLED RealType location()const;
+
+Returns the location parameter of the distribution.
+
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
+
+Returns the scale parameter of the distribution.
+
+[h4 Non-member Accessors]
+
+All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
+that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
+
+Note however that the S[alpha]S Point5 distribution does not have a mean,
+standard deviation, etc. See __math_undefined
+[/link math_toolkit.pol_ref.assert_undefined mathematically undefined function]
+to control whether these should fail to compile with a BOOST_STATIC_ASSERTION_FAILURE,
+which is the default.
+
+Alternately, the functions __mean, __sd,
+__variance, __skewness, __kurtosis and __kurtosis_excess will all
+return a __domain_error if called.
+
+The domain of the random variable is \[-[max_value], +[min_value]\].
+
+[h4 Accuracy]
+
+The error is within 4 epsilon.
+
+Errors in the PDF at 64-bit double precision:
+
+[$../graphs/saspoint5_pdf_accuracy_64.png]
+
+Errors in the CDF-complement at 64-bit double precision:
+
+[$../graphs/saspoint5_ccdf_accuracy_64.png]
+
+[h4 Implementation]
+
+See references.
+
+[h4 References]
+
+* T. Yoshimura, Numerical Evaluation and High Precision Approximation Formula for S[alpha]S Point5 Distribution,
+DOI: 10.36227/techrxiv.172055253.37208198/v1, 2024.
+
+[endsect][/section:saspoint5_dist saspoint5]
+
+[/ saspoint5.qbk
+  Copyright Takuma Yoshimura 2024.
+  Distributed under the Boost Software License, Version 1.0.
+  (See accompanying file LICENSE_1_0.txt or copy at
+  http://www.boost.org/LICENSE_1_0.txt).
+]
diff --git a/doc/distributions/students_t.qbk b/doc/distributions/students_t.qbk
index 9701ce9fca..3396048c5f 100644
--- a/doc/distributions/students_t.qbk
+++ b/doc/distributions/students_t.qbk
@@ -17,13 +17,13 @@
       typedef Policy   policy_type;
 
       // Constructor:
-      students_t_distribution(const RealType& v);
+      BOOST_MATH_GPU_ENABLED students_t_distribution(const RealType& v);
 
       // Accessor:
-      RealType degrees_of_freedom()const;
+      BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const;
 
       // degrees of freedom estimation:
-      static RealType find_degrees_of_freedom(
+      BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
          RealType difference_from_mean,
          RealType alpha,
          RealType beta,
@@ -62,7 +62,7 @@ illustrates how the PDF varies with the degrees of freedom [nu]:
 
 [h4 Member Functions]
 
-   students_t_distribution(const RealType& v);
+   BOOST_MATH_GPU_ENABLED students_t_distribution(const RealType& v);
 
 Constructs a Student's t-distribution with /v/ degrees of freedom.
 
@@ -71,11 +71,11 @@ otherwise calls __domain_error.  Note that
 non-integral degrees of freedom are supported,
 and are meaningful under certain circumstances.
 
-   RealType degrees_of_freedom()const;
+   BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const;
 
 returns the number of degrees of freedom of this distribution.
 
-   static RealType find_degrees_of_freedom(
+   BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
       RealType difference_from_mean,
       RealType alpha,
       RealType beta,
@@ -110,6 +110,8 @@ NIST Engineering Statistics Handbook].
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[-[infin], +[infin]\].
 
diff --git a/doc/distributions/triangular.qbk b/doc/distributions/triangular.qbk
index cd48f3310a..f32514f652 100644
--- a/doc/distributions/triangular.qbk
+++ b/doc/distributions/triangular.qbk
@@ -17,12 +17,12 @@
        typedef RealType value_type;
        typedef Policy   policy_type;
 
-       triangular_distribution(RealType lower = -1, RealType mode = 0, RealType upper = 1); // Constructor.
+       BOOST_MATH_GPU_ENABLED triangular_distribution(RealType lower = -1, RealType mode = 0, RealType upper = 1); // Constructor.
           : m_lower(lower), m_mode(mode), m_upper(upper) // Default is -1, 0, +1 symmetric triangular distribution.
        // Accessor functions.
-       RealType lower()const;
-       RealType mode()const;
-       RealType upper()const;
+       BOOST_MATH_GPU_ENABLED RealType lower()const;
+       BOOST_MATH_GPU_ENABLED RealType mode()const;
+       BOOST_MATH_GPU_ENABLED RealType upper()const;
     }; // class triangular_distribution
 
    }} // namespaces
@@ -77,7 +77,7 @@ and cumulative distribution function
 
 [h4 Member Functions]
 
-   triangular_distribution(RealType lower = 0, RealType mode = 0 RealType upper = 1);
+   BOOST_MATH_GPU_ENABLED triangular_distribution(RealType lower = 0, RealType mode = 0 RealType upper = 1);
 
 Constructs a [@http://en.wikipedia.org/wiki/triangular_distribution triangular distribution]
 with lower  /lower/ (a) and upper /upper/ (b).
@@ -99,15 +99,15 @@ So, for example, to compute a variance using __WolframAlpha, use
 
 The parameters of a distribution can be obtained using these member functions:
 
-   RealType lower()const;
+   BOOST_MATH_GPU_ENABLED RealType lower()const;
 
 Returns the ['lower] parameter of this distribution (default -1).
 
-   RealType mode()const;
+   BOOST_MATH_GPU_ENABLED RealType mode()const;
 
 Returns the ['mode] parameter of this distribution (default 0).
 
-   RealType upper()const;
+   BOOST_MATH_GPU_ENABLED RealType upper()const;
 
 Returns the ['upper] parameter of this distribution (default+1).
 
@@ -115,6 +115,8 @@ Returns the ['upper] parameter of this distribution (default+1).
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \lower\ to \upper\,
 and the supported range is lower <= x <= upper.
diff --git a/doc/distributions/uniform.qbk b/doc/distributions/uniform.qbk
index 58c11735d6..56c965877e 100644
--- a/doc/distributions/uniform.qbk
+++ b/doc/distributions/uniform.qbk
@@ -16,11 +16,11 @@
     public:
        typedef RealType value_type;
 
-       uniform_distribution(RealType lower = 0, RealType upper = 1); // Constructor.
+       BOOST_MATH_GPU_ENABLED uniform_distribution(RealType lower = 0, RealType upper = 1); // Constructor.
           : m_lower(lower), m_upper(upper) // Default is standard uniform distribution.
        // Accessor functions.
-       RealType lower()const;
-       RealType upper()const;
+       BOOST_MATH_GPU_ENABLED RealType lower()const;
+       BOOST_MATH_GPU_ENABLED RealType upper()const;
     }; // class uniform_distribution
    
    }} // namespaces
@@ -66,7 +66,7 @@ Likewise for the CDF:
 
 [h4 Member Functions]
 
-   uniform_distribution(RealType lower = 0, RealType upper = 1);
+   BOOST_MATH_GPU_ENABLED uniform_distribution(RealType lower = 0, RealType upper = 1);
    
 Constructs a [@http://en.wikipedia.org/wiki/uniform_distribution 
 uniform distribution] with lower  /lower/ (a) and upper /upper/ (b).
@@ -74,11 +74,11 @@ uniform distribution] with lower  /lower/ (a) and upper /upper/ (b).
 Requires that the /lower/ and /upper/ parameters are both finite;
 otherwise if infinity or NaN then calls __domain_error.
 
-   RealType lower()const;
+   BOOST_MATH_GPU_ENABLED RealType lower()const;
    
 Returns the /lower/ parameter of this distribution.
    
-   RealType upper()const;
+   BOOST_MATH_GPU_ENABLED RealType upper()const;
       
 Returns the /upper/ parameter of this distribution.
 
@@ -86,6 +86,8 @@ Returns the /upper/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions]
 that are generic to all distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is any finite value,
 but the supported range is only /lower/ <= x <= /upper/.
diff --git a/doc/distributions/weibull.qbk b/doc/distributions/weibull.qbk
index 95c9e461e2..37139ab2c6 100644
--- a/doc/distributions/weibull.qbk
+++ b/doc/distributions/weibull.qbk
@@ -17,10 +17,10 @@
       typedef RealType value_type;
       typedef Policy   policy_type;
       // Construct:
-      weibull_distribution(RealType shape, RealType scale = 1)
+      BOOST_MATH_GPU_ENABLED weibull_distribution(RealType shape, RealType scale = 1)
       // Accessors:
-      RealType shape()const;
-      RealType scale()const;
+      BOOST_MATH_GPU_ENABLED RealType shape()const;
+      BOOST_MATH_GPU_ENABLED RealType scale()const;
    };
    
    }} // namespaces
@@ -65,7 +65,7 @@ Samuel Kotz & Saralees Nadarajah].
    
 [h4 Member Functions]
 
-   weibull_distribution(RealType shape, RealType scale = 1);
+   BOOST_MATH_GPU_ENABLED weibull_distribution(RealType shape, RealType scale = 1);
    
 Constructs a [@http://en.wikipedia.org/wiki/Weibull_distribution 
 Weibull distribution] with shape /shape/ and scale /scale/.
@@ -73,11 +73,11 @@ Weibull distribution] with shape /shape/ and scale /scale/.
 Requires that the /shape/ and /scale/ parameters are both greater than zero, 
 otherwise calls __domain_error.
 
-   RealType shape()const;
+   BOOST_MATH_GPU_ENABLED RealType shape()const;
    
 Returns the /shape/ parameter of this distribution.
    
-   RealType scale()const;
+   BOOST_MATH_GPU_ENABLED RealType scale()const;
       
 Returns the /scale/ parameter of this distribution.
 
@@ -85,9 +85,14 @@ Returns the /scale/ parameter of this distribution.
 
 All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] that are generic to all
 distributions are supported: __usual_accessors.
+For this distribution all non-member accessor functions are marked with `BOOST_MATH_GPU_ENABLED` and can
+be run on both host and device.
 
 The domain of the random variable is \[0, [infin]\].
 
+In this distribution the implementation of both `logcdf`, and `logpdf` are specialized
+to improve numerical accuracy.
+
 [h4 Accuracy]
 
 The Weibull distribution is implemented in terms of the 
@@ -104,7 +109,9 @@ and /q = 1-p/.
 [table
 [[Function][Implementation Notes]]
 [[pdf][Using the relation: pdf = [alpha][beta][super -[alpha] ]x[super [alpha] - 1] e[super -(x/beta)[super alpha]] ]]
+[[logpdf][log(pdf) = log([alpha]) - [alpha] * log([beta]) + log(x) * ([alpha]-1) - pow(x/[beta], [alpha]) ]]
 [[cdf][Using the relation: p = -__expm1(-(x\/[beta])[super [alpha]]) ]]
+[[logcdf][log(cdf) = log1p(-exp(-pow(x / [beta], [alpha]))) ]]
 [[cdf complement][Using the relation: q = e[super -(x\/[beta])[super [alpha]]] ]]
 [[quantile][Using the relation: x = [beta] * (-__log1p(-p))[super 1\/[alpha]] ]]
 [[quantile from the complement][Using the relation: x = [beta] * (-log(q))[super 1\/[alpha]] ]]
diff --git a/doc/equations/holtsmark_ref1.svg b/doc/equations/holtsmark_ref1.svg
new file mode 100644
index 0000000000..77e97352ec
--- /dev/null
+++ b/doc/equations/holtsmark_ref1.svg
@@ -0,0 +1,78 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!-- This file was generated by dvisvgm 3.0.3 -->
+<svg version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='193.415703pt' height='24.007447pt' viewBox='136.916115 63.760895 193.415703 24.007447'>
+<defs>
+<path id='g3-61' d='M3.493898-4.923537C3.535741-5.014197 3.535741-5.028144 3.535741-5.05604C3.535741-5.167621 3.445081-5.230386 3.368369-5.230386C3.249813-5.230386 3.214944-5.1467 3.180075-5.05604L.578829 1.436613C.536986 1.527273 .536986 1.54122 .536986 1.569116C.536986 1.680697 .627646 1.743462 .704359 1.743462C.822914 1.743462 .857783 1.659776 .892653 1.569116L3.493898-4.923537Z'/>
+<path id='g3-105' d='M2.259527-4.358655C2.259527-4.470237 2.175841-4.623661 1.980573-4.623661C1.792279-4.623661 1.590037-4.442341 1.590037-4.2401C1.590037-4.121544 1.680697-3.975093 1.868991-3.975093C2.071233-3.975093 2.259527-4.170361 2.259527-4.358655ZM.836862-.81594C.808966-.72528 .774097-.641594 .774097-.523039C.774097-.195268 1.053051 .069738 1.436613 .069738C2.133998 .069738 2.440847-.892653 2.440847-.99726C2.440847-1.08792 2.350187-1.08792 2.329265-1.08792C2.231631-1.08792 2.224658-1.046077 2.196762-.969365C2.036364-.411457 1.729514-.125529 1.457534-.125529C1.318057-.125529 1.283188-.216189 1.283188-.369614C1.283188-.530012 1.332005-.662516 1.39477-.81594C1.464508-1.004234 1.54122-1.192528 1.617933-1.373848C1.680697-1.54122 1.931756-2.175841 1.959651-2.259527C1.980573-2.329265 2.001494-2.412951 2.001494-2.48269C2.001494-2.810461 1.72254-3.075467 1.338979-3.075467C.648568-3.075467 .327771-2.127024 .327771-2.008468C.327771-1.917808 .425405-1.917808 .446326-1.917808C.54396-1.917808 .550934-1.952677 .571856-2.02939C.753176-2.629141 1.060025-2.880199 1.318057-2.880199C1.429639-2.880199 1.492403-2.824408 1.492403-2.636115C1.492403-2.475716 1.45056-2.371108 1.276214-1.93873L.836862-.81594Z'/>
+<path id='g3-116' d='M1.715567-2.75467H2.426899C2.559402-2.75467 2.650062-2.75467 2.650062-2.908095C2.650062-3.005729 2.559402-3.005729 2.440847-3.005729H1.778331L2.036364-4.037858C2.043337-4.072727 2.057285-4.107597 2.057285-4.135492C2.057285-4.261021 1.959651-4.358655 1.820174-4.358655C1.645828-4.358655 1.54122-4.2401 1.492403-4.05878C1.443587-3.884433 1.534247-4.219178 1.227397-3.005729H.516065C.383562-3.005729 .292902-3.005729 .292902-2.852304C.292902-2.75467 .376588-2.75467 .502117-2.75467H1.164633L.753176-1.108842C.711333-.934496 .648568-.683437 .648568-.592777C.648568-.18132 .99726 .069738 1.39477 .069738C2.168867 .069738 2.608219-.9066 2.608219-.99726S2.517559-1.08792 2.496638-1.08792C2.412951-1.08792 2.405978-1.073973 2.350187-.955417C2.154919-.516065 1.799253-.125529 1.415691-.125529C1.26924-.125529 1.171606-.216189 1.171606-.467248C1.171606-.536986 1.199502-.683437 1.21345-.753176L1.715567-2.75467Z'/>
+<path id='g3-120' d='M1.736488-.739228C1.66675-.502117 1.436613-.125529 1.080946-.125529C1.060025-.125529 .850809-.125529 .704359-.223163C.990286-.313823 1.011208-.564882 1.011208-.606725C1.011208-.760149 .892653-.864757 .732254-.864757C.536986-.864757 .334745-.697385 .334745-.439352C.334745-.09066 .72528 .069738 1.066999 .069738C1.387796 .069738 1.673724-.132503 1.84807-.425405C2.015442-.055791 2.399004 .069738 2.677958 .069738C3.47995 .069738 3.905355-.801993 3.905355-.99726C3.905355-1.08792 3.814695-1.08792 3.793773-1.08792C3.696139-1.08792 3.689166-1.053051 3.66127-.969365C3.514819-.488169 3.096389-.125529 2.705853-.125529C2.426899-.125529 2.280448-.313823 2.280448-.578829C2.280448-.760149 2.447821-1.39477 2.643088-2.168867C2.782565-2.705853 3.096389-2.880199 3.326526-2.880199C3.340473-2.880199 3.556663-2.880199 3.703113-2.782565C3.47995-2.719801 3.396264-2.524533 3.396264-2.399004C3.396264-2.245579 3.514819-2.140971 3.675218-2.140971S4.065753-2.273474 4.065753-2.566376C4.065753-2.956912 3.619427-3.075467 3.340473-3.075467C2.991781-3.075467 2.712827-2.84533 2.559402-2.580324C2.433873-2.866252 2.113076-3.075467 1.72254-3.075467C.941469-3.075467 .495143-2.217684 .495143-2.008468C.495143-1.917808 .592777-1.917808 .613699-1.917808C.704359-1.917808 .711333-1.945704 .746202-2.036364C.920548-2.580324 1.3599-2.880199 1.701619-2.880199C1.931756-2.880199 2.12005-2.75467 2.12005-2.419925C2.12005-2.280448 2.036364-1.931756 1.973599-1.694645L1.736488-.739228Z'/>
+<path id='g5-50' d='M3.521793-1.26924H3.284682C3.263761-1.115816 3.194022-.704359 3.103362-.63462C3.047572-.592777 2.510585-.592777 2.412951-.592777H1.129763C1.862017-1.241345 2.106102-1.436613 2.524533-1.764384C3.040598-2.175841 3.521793-2.608219 3.521793-3.270735C3.521793-4.11457 2.782565-4.630635 1.889913-4.630635C1.025156-4.630635 .439352-4.02391 .439352-3.382316C.439352-3.02665 .739228-2.991781 .808966-2.991781C.976339-2.991781 1.17858-3.110336 1.17858-3.361395C1.17858-3.486924 1.129763-3.731009 .767123-3.731009C.983313-4.226152 1.457534-4.379577 1.785305-4.379577C2.48269-4.379577 2.84533-3.835616 2.84533-3.270735C2.84533-2.66401 2.412951-2.182814 2.189788-1.931756L.509091-.27198C.439352-.209215 .439352-.195268 .439352 0H3.312578L3.521793-1.26924Z'/>
+<path id='g5-51' d='M1.903861-2.329265C2.447821-2.329265 2.838356-1.952677 2.838356-1.206476C2.838356-.341719 2.336239-.083686 1.931756-.083686C1.652802-.083686 1.039103-.160399 .746202-.571856C1.073973-.585803 1.150685-.81594 1.150685-.962391C1.150685-1.185554 .983313-1.345953 .767123-1.345953C.571856-1.345953 .376588-1.227397 .376588-.941469C.376588-.285928 1.101868 .139477 1.945704 .139477C2.915068 .139477 3.584558-.509091 3.584558-1.206476C3.584558-1.750436 3.138232-2.294396 2.371108-2.454795C3.103362-2.719801 3.368369-3.242839 3.368369-3.668244C3.368369-4.219178 2.733748-4.630635 1.959651-4.630635S.592777-4.254047 .592777-3.696139C.592777-3.459029 .746202-3.326526 .955417-3.326526C1.171606-3.326526 1.311083-3.486924 1.311083-3.682192C1.311083-3.884433 1.171606-4.030884 .955417-4.044832C1.199502-4.351681 1.680697-4.428394 1.93873-4.428394C2.252553-4.428394 2.691905-4.274969 2.691905-3.668244C2.691905-3.375342 2.594271-3.054545 2.412951-2.838356C2.182814-2.57335 1.987547-2.559402 1.638854-2.538481C1.464508-2.524533 1.45056-2.524533 1.415691-2.517559C1.401743-2.517559 1.345953-2.503611 1.345953-2.426899C1.345953-2.329265 1.408717-2.329265 1.527273-2.329265H1.903861Z'/>
+<path id='g1-0' d='M5.188543-1.57609C5.300125-1.57609 5.467497-1.57609 5.467497-1.743462C5.467497-1.917808 5.307098-1.917808 5.188543-1.917808H1.03213C.920548-1.917808 .753176-1.917808 .753176-1.750436C.753176-1.57609 .913574-1.57609 1.03213-1.57609H5.188543Z'/>
+<path id='g1-49' d='M4.030884-1.910834C3.66127-2.343213 3.549689-2.461768 3.284682-2.650062C2.859278-2.956912 2.412951-3.075467 2.02939-3.075467C1.164633-3.075467 .536986-2.336239 .536986-1.499377C.536986-.683437 1.136737 .069738 2.008468 .069738C2.977833 .069738 3.654296-.72528 3.905355-1.094894C4.274969-.662516 4.38655-.54396 4.651557-.355666C5.076961-.048817 5.523288 .069738 5.906849 .069738C6.771606 .069738 7.399253-.669489 7.399253-1.506351C7.399253-2.322291 6.799502-3.075467 5.927771-3.075467C4.958406-3.075467 4.281943-2.280448 4.030884-1.910834ZM4.254047-1.652802C4.533001-2.106102 5.139726-2.810461 5.990535-2.810461C6.708842-2.810461 7.203985-2.175841 7.203985-1.506351S6.660025-.313823 6.018431-.313823C5.369863-.313823 4.923537-.843836 4.254047-1.652802ZM3.682192-1.352927C3.403238-.899626 2.796513-.195268 1.945704-.195268C1.227397-.195268 .732254-.829888 .732254-1.499377S1.276214-2.691905 1.917808-2.691905C2.566376-2.691905 3.012702-2.161893 3.682192-1.352927Z'/>
+<path id='g4-40' d='M3.297634 2.391034C3.297634 2.361146 3.297634 2.34122 3.128269 2.171856C1.882939 .916563 1.564134-.966376 1.564134-2.49066C1.564134-4.224159 1.942715-5.957659 3.16812-7.202989C3.297634-7.32254 3.297634-7.342466 3.297634-7.372354C3.297634-7.442092 3.257783-7.47198 3.198007-7.47198C3.098381-7.47198 2.201743-6.794521 1.613948-5.529265C1.105853-4.433375 .986301-3.327522 .986301-2.49066C.986301-1.713574 1.09589-.508095 1.643836 .617684C2.241594 1.843088 3.098381 2.49066 3.198007 2.49066C3.257783 2.49066 3.297634 2.460772 3.297634 2.391034Z'/>
+<path id='g4-41' d='M2.879203-2.49066C2.879203-3.267746 2.769614-4.473225 2.221669-5.599004C1.62391-6.824408 .767123-7.47198 .667497-7.47198C.607721-7.47198 .56787-7.43213 .56787-7.372354C.56787-7.342466 .56787-7.32254 .757161-7.143213C1.733499-6.156912 2.30137-4.572852 2.30137-2.49066C2.30137-.787049 1.932752 .966376 .697385 2.221669C.56787 2.34122 .56787 2.361146 .56787 2.391034C.56787 2.450809 .607721 2.49066 .667497 2.49066C.767123 2.49066 1.663761 1.8132 2.251557 .547945C2.759651-.547945 2.879203-1.653798 2.879203-2.49066Z'/>
+<path id='g4-49' d='M2.929016-6.37609C2.929016-6.615193 2.929016-6.635118 2.699875-6.635118C2.082192-5.997509 1.205479-5.997509 .886675-5.997509V-5.688667C1.085928-5.688667 1.673724-5.688667 2.191781-5.947696V-.787049C2.191781-.428394 2.161893-.308842 1.265255-.308842H.946451V0C1.295143-.029888 2.161893-.029888 2.560399-.029888S3.825654-.029888 4.174346 0V-.308842H3.855542C2.958904-.308842 2.929016-.418431 2.929016-.787049V-6.37609Z'/>
+<path id='g4-50' d='M1.265255-.767123L2.321295-1.793275C3.875467-3.16812 4.473225-3.706102 4.473225-4.702366C4.473225-5.838107 3.576588-6.635118 2.361146-6.635118C1.235367-6.635118 .498132-5.718555 .498132-4.83188C.498132-4.273973 .996264-4.273973 1.026152-4.273973C1.195517-4.273973 1.544209-4.393524 1.544209-4.801993C1.544209-5.061021 1.364882-5.32005 1.016189-5.32005C.936488-5.32005 .916563-5.32005 .886675-5.310087C1.115816-5.957659 1.653798-6.326276 2.231631-6.326276C3.138232-6.326276 3.566625-5.519303 3.566625-4.702366C3.566625-3.905355 3.068493-3.118306 2.520548-2.500623L.607721-.368618C.498132-.259029 .498132-.239103 .498132 0H4.194271L4.473225-1.733499H4.224159C4.174346-1.43462 4.104608-.996264 4.004981-.846824C3.935243-.767123 3.277709-.767123 3.058531-.767123H1.265255Z'/>
+<path id='g4-59' d='M1.912827-3.765878C1.912827-4.054795 1.673724-4.293898 1.384807-4.293898S.856787-4.054795 .856787-3.765878S1.09589-3.237858 1.384807-3.237858S1.912827-3.476961 1.912827-3.765878ZM1.723537-.109589C1.723537 .169365 1.723537 .946451 1.085928 1.683686C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 1.942715 1.125778 1.942715-.029888C1.942715-.318804 1.92279-1.05604 1.384807-1.05604C1.036115-1.05604 .856787-.787049 .856787-.52802S1.026152 0 1.384807 0C1.43462 0 1.454545 0 1.464508-.009963C1.534247-.019925 1.643836-.039851 1.723537-.109589Z'/>
+<path id='g4-61' d='M6.844334-3.257783C6.993773-3.257783 7.183064-3.257783 7.183064-3.457036S6.993773-3.656289 6.854296-3.656289H.886675C.747198-3.656289 .557908-3.656289 .557908-3.457036S.747198-3.257783 .896638-3.257783H6.844334ZM6.854296-1.325031C6.993773-1.325031 7.183064-1.325031 7.183064-1.524284S6.993773-1.723537 6.844334-1.723537H.896638C.747198-1.723537 .557908-1.723537 .557908-1.524284S.747198-1.325031 .886675-1.325031H6.854296Z'/>
+<path id='g4-101' d='M1.115816-2.510585C1.175592-3.995019 2.012453-4.244085 2.351183-4.244085C3.377335-4.244085 3.476961-2.899128 3.476961-2.510585H1.115816ZM1.105853-2.30137H3.88543C4.104608-2.30137 4.134496-2.30137 4.134496-2.510585C4.134496-3.496887 3.596513-4.463263 2.351183-4.463263C1.195517-4.463263 .278954-3.437111 .278954-2.191781C.278954-.856787 1.325031 .109589 2.470735 .109589C3.686177 .109589 4.134496-.996264 4.134496-1.185554C4.134496-1.285181 4.054795-1.305106 4.004981-1.305106C3.915318-1.305106 3.895392-1.24533 3.875467-1.165629C3.526775-.139477 2.630137-.139477 2.530511-.139477C2.032379-.139477 1.633873-.438356 1.404732-.806974C1.105853-1.285181 1.105853-1.942715 1.105853-2.30137Z'/>
+<path id='g4-112' d='M1.713574-3.745953V-4.403487L.278954-4.293898V-3.985056C.986301-3.985056 1.05604-3.92528 1.05604-3.486924V1.175592C1.05604 1.62391 .946451 1.62391 .278954 1.62391V1.932752C.617684 1.92279 1.135741 1.902864 1.39477 1.902864C1.663761 1.902864 2.171856 1.92279 2.520548 1.932752V1.62391C1.853051 1.62391 1.743462 1.62391 1.743462 1.175592V-.498132V-.587796C1.793275-.428394 2.211706 .109589 2.968867 .109589C4.154421 .109589 5.190535-.86675 5.190535-2.15193C5.190535-3.417186 4.224159-4.403487 3.108344-4.403487C2.331258-4.403487 1.912827-3.965131 1.713574-3.745953ZM1.743462-1.135741V-3.35741C2.032379-3.865504 2.520548-4.154421 3.028643-4.154421C3.755915-4.154421 4.363636-3.277709 4.363636-2.15193C4.363636-.946451 3.666252-.109589 2.929016-.109589C2.530511-.109589 2.15193-.308842 1.882939-.71731C1.743462-.926526 1.743462-.936488 1.743462-1.135741Z'/>
+<path id='g4-120' d='M2.859278-2.34122C3.158157-2.719801 3.536737-3.20797 3.775841-3.466999C4.084682-3.825654 4.493151-3.975093 4.961395-3.985056V-4.293898C4.702366-4.273973 4.403487-4.26401 4.144458-4.26401C3.845579-4.26401 3.317559-4.283935 3.188045-4.293898V-3.985056C3.39726-3.965131 3.476961-3.835616 3.476961-3.676214S3.377335-3.387298 3.327522-3.327522L2.709838-2.550436L1.932752-3.556663C1.843088-3.656289 1.843088-3.676214 1.843088-3.73599C1.843088-3.88543 1.992528-3.975093 2.191781-3.985056V-4.293898C1.932752-4.283935 1.275218-4.26401 1.115816-4.26401C.9066-4.26401 .438356-4.273973 .169365-4.293898V-3.985056C.86675-3.985056 .876712-3.975093 1.344956-3.377335L2.331258-2.092154L1.39477-.9066C.916563-.328767 .328767-.308842 .119552-.308842V0C.37858-.019925 .687422-.029888 .946451-.029888C1.235367-.029888 1.653798-.009963 1.892902 0V-.308842C1.673724-.33873 1.603985-.468244 1.603985-.617684C1.603985-.836862 1.892902-1.165629 2.500623-1.882939L3.257783-.886675C3.337484-.777086 3.466999-.617684 3.466999-.557908C3.466999-.468244 3.377335-.318804 3.108344-.308842V0C3.407223-.009963 3.965131-.029888 4.184309-.029888C4.4533-.029888 4.841843-.019925 5.140722 0V-.308842C4.60274-.308842 4.423412-.328767 4.194271-.617684L2.859278-2.34122Z'/>
+<path id='g2-22' d='M2.30137-3.496887C2.351183-3.706102 2.450809-4.084682 2.450809-4.134496C2.450809-4.303861 2.331258-4.403487 2.161893-4.403487C2.132005-4.403487 1.853051-4.393524 1.763387-4.044832L.328767 1.723537C.298879 1.843088 .298879 1.863014 .298879 1.882939C.298879 2.032379 .408468 2.15193 .577833 2.15193C.787049 2.15193 .9066 1.972603 .926526 1.942715C.966376 1.853051 1.09589 1.334994 1.474471-.199253C1.793275 .069738 2.241594 .109589 2.440847 .109589C3.138232 .109589 3.526775-.33873 3.765878-.617684C3.855542-.169365 4.224159 .109589 4.662516 .109589C5.011208 .109589 5.240349-.119552 5.399751-.438356C5.569116-.797011 5.69863-1.404732 5.69863-1.424658C5.69863-1.524284 5.608966-1.524284 5.579078-1.524284C5.479452-1.524284 5.469489-1.484433 5.439601-1.344956C5.270237-.697385 5.090909-.109589 4.682441-.109589C4.41345-.109589 4.383562-.368618 4.383562-.56787C4.383562-.787049 4.503113-1.255293 4.582814-1.603985L4.861768-2.67995C4.891656-2.82939 4.991283-3.20797 5.031133-3.35741C5.080946-3.58655 5.180573-3.965131 5.180573-4.024907C5.180573-4.204234 5.041096-4.293898 4.891656-4.293898C4.841843-4.293898 4.582814-4.283935 4.503113-3.945205L4.034869-2.082192C3.915318-1.58406 3.805729-1.165629 3.775841-1.066002C3.765878-1.016189 3.287671-.109589 2.480697-.109589C1.982565-.109589 1.743462-.438356 1.743462-.976339C1.743462-1.265255 1.8132-1.544209 1.882939-1.823163L2.30137-3.496887Z'/>
+<path id='g2-25' d='M2.6401-3.716065H3.765878C3.437111-2.241594 3.347447-1.8132 3.347447-1.145704C3.347447-.996264 3.347447-.727273 3.427148-.388543C3.526775 .049813 3.636364 .109589 3.785803 .109589C3.985056 .109589 4.194271-.069738 4.194271-.268991C4.194271-.328767 4.194271-.348692 4.134496-.488169C3.845579-1.205479 3.845579-1.853051 3.845579-2.132005C3.845579-2.660025 3.915318-3.198007 4.024907-3.716065H5.160648C5.290162-3.716065 5.648817-3.716065 5.648817-4.054795C5.648817-4.293898 5.439601-4.293898 5.250311-4.293898H1.912827C1.693649-4.293898 1.315068-4.293898 .876712-3.825654C.52802-3.437111 .268991-2.978829 .268991-2.929016C.268991-2.919054 .268991-2.82939 .388543-2.82939C.468244-2.82939 .488169-2.86924 .547945-2.948941C1.036115-3.716065 1.613948-3.716065 1.8132-3.716065H2.381071C2.062267-2.510585 1.524284-1.305106 1.105853-.398506C1.026152-.249066 1.026152-.229141 1.026152-.159402C1.026152 .029888 1.185554 .109589 1.315068 .109589C1.613948 .109589 1.693649-.169365 1.8132-.537983C1.952677-.996264 1.952677-1.016189 2.082192-1.514321L2.6401-3.716065Z'/>
+<path id='g2-59' d='M2.022416-.009963C2.022416-.667497 1.77335-1.05604 1.384807-1.05604C1.05604-1.05604 .856787-.806974 .856787-.52802C.856787-.259029 1.05604 0 1.384807 0C1.504359 0 1.633873-.039851 1.733499-.129514C1.763387-.14944 1.77335-.159402 1.783313-.159402S1.803238-.14944 1.803238-.009963C1.803238 .727273 1.454545 1.325031 1.125778 1.653798C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 2.022416 1.155666 2.022416-.009963Z'/>
+<path id='g2-99' d='M3.945205-3.785803C3.785803-3.785803 3.646326-3.785803 3.506849-3.646326C3.347447-3.496887 3.327522-3.327522 3.327522-3.257783C3.327522-3.01868 3.506849-2.909091 3.696139-2.909091C3.985056-2.909091 4.254047-3.148194 4.254047-3.5467C4.254047-4.034869 3.785803-4.403487 3.078456-4.403487C1.733499-4.403487 .408468-2.978829 .408468-1.574097C.408468-.67746 .986301 .109589 2.022416 .109589C3.447073 .109589 4.283935-.946451 4.283935-1.066002C4.283935-1.125778 4.224159-1.195517 4.164384-1.195517C4.11457-1.195517 4.094645-1.175592 4.034869-1.09589C3.247821-.109589 2.161893-.109589 2.042341-.109589C1.414695-.109589 1.145704-.597758 1.145704-1.195517C1.145704-1.603985 1.344956-2.570361 1.683686-3.188045C1.992528-3.755915 2.540473-4.184309 3.088418-4.184309C3.427148-4.184309 3.805729-4.054795 3.945205-3.785803Z'/>
+<path id='g2-100' d='M5.140722-6.804483C5.140722-6.814446 5.140722-6.914072 5.011208-6.914072C4.861768-6.914072 3.915318-6.824408 3.745953-6.804483C3.666252-6.794521 3.606476-6.744707 3.606476-6.615193C3.606476-6.495641 3.696139-6.495641 3.845579-6.495641C4.323786-6.495641 4.343711-6.425903 4.343711-6.326276L4.313823-6.127024L3.716065-3.765878C3.536737-4.134496 3.247821-4.403487 2.799502-4.403487C1.633873-4.403487 .398506-2.938979 .398506-1.484433C.398506-.547945 .946451 .109589 1.723537 .109589C1.92279 .109589 2.420922 .069738 3.01868-.637609C3.098381-.219178 3.447073 .109589 3.92528 .109589C4.273973 .109589 4.503113-.119552 4.662516-.438356C4.83188-.797011 4.961395-1.404732 4.961395-1.424658C4.961395-1.524284 4.871731-1.524284 4.841843-1.524284C4.742217-1.524284 4.732254-1.484433 4.702366-1.344956C4.533001-.697385 4.353674-.109589 3.945205-.109589C3.676214-.109589 3.646326-.368618 3.646326-.56787C3.646326-.806974 3.666252-.876712 3.706102-1.046077L5.140722-6.804483ZM3.068493-1.185554C3.01868-1.006227 3.01868-.986301 2.86924-.816936C2.430884-.268991 2.022416-.109589 1.743462-.109589C1.24533-.109589 1.105853-.657534 1.105853-1.046077C1.105853-1.544209 1.424658-2.769614 1.653798-3.227895C1.96264-3.815691 2.410959-4.184309 2.809465-4.184309C3.457036-4.184309 3.596513-3.367372 3.596513-3.307597S3.576588-3.188045 3.566625-3.138232L3.068493-1.185554Z'/>
+<path id='g2-101' d='M1.863014-2.30137C2.15193-2.30137 2.889166-2.321295 3.387298-2.530511C4.084682-2.82939 4.134496-3.417186 4.134496-3.556663C4.134496-3.995019 3.755915-4.403487 3.068493-4.403487C1.96264-4.403487 .458281-3.437111 .458281-1.693649C.458281-.67746 1.046077 .109589 2.022416 .109589C3.447073 .109589 4.283935-.946451 4.283935-1.066002C4.283935-1.125778 4.224159-1.195517 4.164384-1.195517C4.11457-1.195517 4.094645-1.175592 4.034869-1.09589C3.247821-.109589 2.161893-.109589 2.042341-.109589C1.265255-.109589 1.175592-.946451 1.175592-1.265255C1.175592-1.384807 1.185554-1.693649 1.334994-2.30137H1.863014ZM1.39477-2.520548C1.783313-4.034869 2.809465-4.184309 3.068493-4.184309C3.536737-4.184309 3.805729-3.895392 3.805729-3.556663C3.805729-2.520548 2.211706-2.520548 1.803238-2.520548H1.39477Z'/>
+<path id='g2-102' d='M3.656289-3.985056H4.513076C4.712329-3.985056 4.811955-3.985056 4.811955-4.184309C4.811955-4.293898 4.712329-4.293898 4.542964-4.293898H3.716065L3.92528-5.429639C3.965131-5.638854 4.104608-6.346202 4.164384-6.465753C4.254047-6.655044 4.423412-6.804483 4.632628-6.804483C4.672478-6.804483 4.931507-6.804483 5.120797-6.625156C4.682441-6.585305 4.582814-6.236613 4.582814-6.087173C4.582814-5.858032 4.762142-5.738481 4.951432-5.738481C5.210461-5.738481 5.499377-5.957659 5.499377-6.336239C5.499377-6.794521 5.041096-7.023661 4.632628-7.023661C4.293898-7.023661 3.666252-6.844334 3.367372-5.858032C3.307597-5.648817 3.277709-5.549191 3.038605-4.293898H2.351183C2.161893-4.293898 2.052304-4.293898 2.052304-4.104608C2.052304-3.985056 2.141968-3.985056 2.331258-3.985056H2.988792L2.241594-.049813C2.062267 .916563 1.892902 1.823163 1.374844 1.823163C1.334994 1.823163 1.085928 1.823163 .896638 1.643836C1.354919 1.613948 1.444583 1.255293 1.444583 1.105853C1.444583 .876712 1.265255 .757161 1.075965 .757161C.816936 .757161 .52802 .976339 .52802 1.354919C.52802 1.803238 .966376 2.042341 1.374844 2.042341C1.92279 2.042341 2.321295 1.454545 2.500623 1.075965C2.819427 .448319 3.048568-.757161 3.058531-.826899L3.656289-3.985056Z'/>
+<path id='g2-105' d='M2.82939-6.22665C2.82939-6.425903 2.689913-6.585305 2.460772-6.585305C2.191781-6.585305 1.92279-6.326276 1.92279-6.057285C1.92279-5.867995 2.062267-5.69863 2.30137-5.69863C2.530511-5.69863 2.82939-5.927771 2.82939-6.22665ZM2.072229-2.480697C2.191781-2.769614 2.191781-2.789539 2.291407-3.058531C2.371108-3.257783 2.420922-3.39726 2.420922-3.58655C2.420922-4.034869 2.102117-4.403487 1.603985-4.403487C.667497-4.403487 .288917-2.958904 .288917-2.86924C.288917-2.769614 .388543-2.769614 .408468-2.769614C.508095-2.769614 .518057-2.789539 .56787-2.948941C.836862-3.88543 1.235367-4.184309 1.574097-4.184309C1.653798-4.184309 1.823163-4.184309 1.823163-3.865504C1.823163-3.656289 1.753425-3.447073 1.713574-3.347447C1.633873-3.088418 1.185554-1.932752 1.026152-1.504359C.926526-1.24533 .797011-.916563 .797011-.707347C.797011-.239103 1.135741 .109589 1.613948 .109589C2.550436 .109589 2.919054-1.334994 2.919054-1.424658C2.919054-1.524284 2.82939-1.524284 2.799502-1.524284C2.699875-1.524284 2.699875-1.494396 2.650062-1.344956C2.470735-.71731 2.141968-.109589 1.633873-.109589C1.464508-.109589 1.39477-.209215 1.39477-.438356C1.39477-.687422 1.454545-.826899 1.683686-1.43462L2.072229-2.480697Z'/>
+<path id='g2-116' d='M2.052304-3.985056H2.988792C3.188045-3.985056 3.287671-3.985056 3.287671-4.184309C3.287671-4.293898 3.188045-4.293898 3.008717-4.293898H2.132005C2.49066-5.708593 2.540473-5.907846 2.540473-5.967621C2.540473-6.136986 2.420922-6.236613 2.251557-6.236613C2.221669-6.236613 1.942715-6.22665 1.853051-5.877958L1.464508-4.293898H.52802C.328767-4.293898 .229141-4.293898 .229141-4.104608C.229141-3.985056 .308842-3.985056 .508095-3.985056H1.384807C.667497-1.155666 .627646-.986301 .627646-.806974C.627646-.268991 1.006227 .109589 1.544209 .109589C2.560399 .109589 3.128269-1.344956 3.128269-1.424658C3.128269-1.524284 3.048568-1.524284 3.008717-1.524284C2.919054-1.524284 2.909091-1.494396 2.859278-1.384807C2.430884-.348692 1.902864-.109589 1.564134-.109589C1.354919-.109589 1.255293-.239103 1.255293-.56787C1.255293-.806974 1.275218-.876712 1.315068-1.046077L2.052304-3.985056Z'/>
+<path id='g2-120' d='M3.327522-3.008717C3.387298-3.267746 3.616438-4.184309 4.313823-4.184309C4.363636-4.184309 4.60274-4.184309 4.811955-4.054795C4.533001-4.004981 4.333748-3.755915 4.333748-3.516812C4.333748-3.35741 4.443337-3.16812 4.712329-3.16812C4.931507-3.16812 5.250311-3.347447 5.250311-3.745953C5.250311-4.26401 4.662516-4.403487 4.323786-4.403487C3.745953-4.403487 3.39726-3.875467 3.277709-3.646326C3.028643-4.303861 2.49066-4.403487 2.201743-4.403487C1.165629-4.403487 .597758-3.118306 .597758-2.86924C.597758-2.769614 .697385-2.769614 .71731-2.769614C.797011-2.769614 .826899-2.789539 .846824-2.879203C1.185554-3.935243 1.843088-4.184309 2.181818-4.184309C2.371108-4.184309 2.719801-4.094645 2.719801-3.516812C2.719801-3.20797 2.550436-2.540473 2.181818-1.145704C2.022416-.52802 1.673724-.109589 1.235367-.109589C1.175592-.109589 .946451-.109589 .737235-.239103C.986301-.288917 1.205479-.498132 1.205479-.777086C1.205479-1.046077 .986301-1.125778 .836862-1.125778C.537983-1.125778 .288917-.86675 .288917-.547945C.288917-.089664 .787049 .109589 1.225405 .109589C1.882939 .109589 2.241594-.587796 2.271482-.647572C2.391034-.278954 2.749689 .109589 3.347447 .109589C4.373599 .109589 4.941469-1.175592 4.941469-1.424658C4.941469-1.524284 4.851806-1.524284 4.821918-1.524284C4.732254-1.524284 4.712329-1.484433 4.692403-1.414695C4.363636-.348692 3.686177-.109589 3.367372-.109589C2.978829-.109589 2.819427-.428394 2.819427-.767123C2.819427-.986301 2.879203-1.205479 2.988792-1.643836L3.327522-3.008717Z'/>
+<path id='g6-90' d='M1.036115 21.688667C1.354919 21.668742 1.524284 21.449564 1.524284 21.200498C1.524284 20.871731 1.275218 20.712329 1.046077 20.712329C.806974 20.712329 .557908 20.861768 .557908 21.210461C.557908 21.718555 1.05604 22.136986 1.663761 22.136986C3.178082 22.136986 3.745953 19.805729 4.4533 16.916563C5.220423 13.768369 5.867995 10.590286 6.405978 7.392279C6.774595 5.270237 7.143213 3.277709 7.481943 1.992528C7.601494 1.504359 7.940224 .219178 8.328767 .219178C8.637609 .219178 8.886675 .408468 8.926526 .448319C8.597758 .468244 8.428394 .687422 8.428394 .936488C8.428394 1.265255 8.67746 1.424658 8.9066 1.424658C9.145704 1.424658 9.39477 1.275218 9.39477 .926526C9.39477 .388543 8.856787 0 8.308842 0C7.551681 0 6.993773 1.085928 6.445828 3.118306C6.41594 3.227895 5.061021 8.229141 3.965131 14.744707C3.706102 16.268991 3.417186 17.932752 3.088418 19.317559C2.909091 20.044832 2.450809 21.917808 1.643836 21.917808C1.285181 21.917808 1.046077 21.688667 1.036115 21.688667Z'/>
+<path id='g0-0' d='M6.56538-2.291407C6.734745-2.291407 6.914072-2.291407 6.914072-2.49066S6.734745-2.689913 6.56538-2.689913H1.175592C1.006227-2.689913 .826899-2.689913 .826899-2.49066S1.006227-2.291407 1.175592-2.291407H6.56538Z'/>
+<path id='g0-106' d='M1.58406-7.113325C1.58406-7.292653 1.58406-7.47198 1.384807-7.47198S1.185554-7.292653 1.185554-7.113325V2.132005C1.185554 2.311333 1.185554 2.49066 1.384807 2.49066S1.58406 2.311333 1.58406 2.132005V-7.113325Z'/>
+</defs>
+<g id='page1'>
+<use x='136.916115' y='77.86093' xlink:href='#g2-102'/>
+<use x='142.866062' y='77.86093' xlink:href='#g4-40'/>
+<use x='146.740436' y='77.86093' xlink:href='#g2-120'/>
+<use x='152.434368' y='77.86093' xlink:href='#g4-59'/>
+<use x='156.862176' y='77.86093' xlink:href='#g2-22'/>
+<use x='162.86516' y='77.86093' xlink:href='#g2-59'/>
+<use x='167.292968' y='77.86093' xlink:href='#g2-99'/>
+<use x='171.604365' y='77.86093' xlink:href='#g4-41'/>
+<use x='178.246071' y='77.86093' xlink:href='#g4-61'/>
+<use x='192.975872' y='71.121128' xlink:href='#g4-49'/>
+<rect x='189.957665' y='75.17102' height='.398484' width='11.01775'/>
+<use x='189.957665' y='84.6948' xlink:href='#g4-50'/>
+<use x='194.939004' y='84.6948' xlink:href='#g2-25'/>
+<use x='203.831328' y='64.30056' xlink:href='#g6-90'/>
+<use x='213.793983' y='66.763522' xlink:href='#g1-49'/>
+<use x='209.366145' y='86.938112' xlink:href='#g1-0'/>
+<use x='215.592824' y='86.938112' xlink:href='#g1-49'/>
+<use x='225.693802' y='77.86093' xlink:href='#g4-101'/>
+<use x='230.121653' y='77.86093' xlink:href='#g4-120'/>
+<use x='235.379741' y='77.86093' xlink:href='#g4-112'/>
+<use x='240.914541' y='77.86093' xlink:href='#g4-40'/>
+<use x='244.788915' y='77.86093' xlink:href='#g2-105'/>
+<use x='248.221173' y='77.86093' xlink:href='#g2-116'/>
+<use x='251.818808' y='77.86093' xlink:href='#g2-22'/>
+<use x='260.035658' y='77.86093' xlink:href='#g0-0'/>
+<use x='269.998272' y='77.86093' xlink:href='#g0-106'/>
+<use x='272.76568' y='77.86093' xlink:href='#g2-99'/>
+<use x='277.077077' y='77.86093' xlink:href='#g2-116'/>
+<use x='280.674711' y='77.86093' xlink:href='#g0-106'/>
+<use x='283.44212' y='73.747434' xlink:href='#g5-51'/>
+<use x='287.41336' y='73.747434' xlink:href='#g3-61'/>
+<use x='291.495296' y='73.747434' xlink:href='#g5-50'/>
+<use x='295.964649' y='77.86093' xlink:href='#g4-41'/>
+<use x='299.839023' y='77.86093' xlink:href='#g2-101'/>
+<use x='304.477894' y='73.747434' xlink:href='#g1-0'/>
+<use x='310.704574' y='73.747434' xlink:href='#g3-105'/>
+<use x='313.5233' y='73.747434' xlink:href='#g3-120'/>
+<use x='318.041102' y='73.747434' xlink:href='#g3-116'/>
+<use x='321.548751' y='77.86093' xlink:href='#g2-100'/>
+<use x='326.734183' y='77.86093' xlink:href='#g2-116'/>
+</g>
+</svg>
\ No newline at end of file
diff --git a/doc/equations/landau_ref1.svg b/doc/equations/landau_ref1.svg
new file mode 100644
index 0000000000..19939a829f
--- /dev/null
+++ b/doc/equations/landau_ref1.svg
@@ -0,0 +1,88 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!-- This file was generated by dvisvgm 3.0.3 -->
+<svg version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='269.290323pt' height='23.910579pt' viewBox='98.97879 63.760885 269.290323 23.910579'>
+<defs>
+<path id='g0-0' d='M6.56538-2.291407C6.734745-2.291407 6.914072-2.291407 6.914072-2.49066S6.734745-2.689913 6.56538-2.689913H1.175592C1.006227-2.689913 .826899-2.689913 .826899-2.49066S1.006227-2.291407 1.175592-2.291407H6.56538Z'/>
+<path id='g3-40' d='M3.297634 2.391034C3.297634 2.361146 3.297634 2.34122 3.128269 2.171856C1.882939 .916563 1.564134-.966376 1.564134-2.49066C1.564134-4.224159 1.942715-5.957659 3.16812-7.202989C3.297634-7.32254 3.297634-7.342466 3.297634-7.372354C3.297634-7.442092 3.257783-7.47198 3.198007-7.47198C3.098381-7.47198 2.201743-6.794521 1.613948-5.529265C1.105853-4.433375 .986301-3.327522 .986301-2.49066C.986301-1.713574 1.09589-.508095 1.643836 .617684C2.241594 1.843088 3.098381 2.49066 3.198007 2.49066C3.257783 2.49066 3.297634 2.460772 3.297634 2.391034Z'/>
+<path id='g3-41' d='M2.879203-2.49066C2.879203-3.267746 2.769614-4.473225 2.221669-5.599004C1.62391-6.824408 .767123-7.47198 .667497-7.47198C.607721-7.47198 .56787-7.43213 .56787-7.372354C.56787-7.342466 .56787-7.32254 .757161-7.143213C1.733499-6.156912 2.30137-4.572852 2.30137-2.49066C2.30137-.787049 1.932752 .966376 .697385 2.221669C.56787 2.34122 .56787 2.361146 .56787 2.391034C.56787 2.450809 .607721 2.49066 .667497 2.49066C.767123 2.49066 1.663761 1.8132 2.251557 .547945C2.759651-.547945 2.879203-1.653798 2.879203-2.49066Z'/>
+<path id='g3-43' d='M4.07472-2.291407H6.854296C6.993773-2.291407 7.183064-2.291407 7.183064-2.49066S6.993773-2.689913 6.854296-2.689913H4.07472V-5.479452C4.07472-5.618929 4.07472-5.808219 3.875467-5.808219S3.676214-5.618929 3.676214-5.479452V-2.689913H.886675C.747198-2.689913 .557908-2.689913 .557908-2.49066S.747198-2.291407 .886675-2.291407H3.676214V.498132C3.676214 .637609 3.676214 .826899 3.875467 .826899S4.07472 .637609 4.07472 .498132V-2.291407Z'/>
+<path id='g3-49' d='M2.929016-6.37609C2.929016-6.615193 2.929016-6.635118 2.699875-6.635118C2.082192-5.997509 1.205479-5.997509 .886675-5.997509V-5.688667C1.085928-5.688667 1.673724-5.688667 2.191781-5.947696V-.787049C2.191781-.428394 2.161893-.308842 1.265255-.308842H.946451V0C1.295143-.029888 2.161893-.029888 2.560399-.029888S3.825654-.029888 4.174346 0V-.308842H3.855542C2.958904-.308842 2.929016-.418431 2.929016-.787049V-6.37609Z'/>
+<path id='g3-50' d='M1.265255-.767123L2.321295-1.793275C3.875467-3.16812 4.473225-3.706102 4.473225-4.702366C4.473225-5.838107 3.576588-6.635118 2.361146-6.635118C1.235367-6.635118 .498132-5.718555 .498132-4.83188C.498132-4.273973 .996264-4.273973 1.026152-4.273973C1.195517-4.273973 1.544209-4.393524 1.544209-4.801993C1.544209-5.061021 1.364882-5.32005 1.016189-5.32005C.936488-5.32005 .916563-5.32005 .886675-5.310087C1.115816-5.957659 1.653798-6.326276 2.231631-6.326276C3.138232-6.326276 3.566625-5.519303 3.566625-4.702366C3.566625-3.905355 3.068493-3.118306 2.520548-2.500623L.607721-.368618C.498132-.259029 .498132-.239103 .498132 0H4.194271L4.473225-1.733499H4.224159C4.174346-1.43462 4.104608-.996264 4.004981-.846824C3.935243-.767123 3.277709-.767123 3.058531-.767123H1.265255Z'/>
+<path id='g3-59' d='M1.912827-3.765878C1.912827-4.054795 1.673724-4.293898 1.384807-4.293898S.856787-4.054795 .856787-3.765878S1.09589-3.237858 1.384807-3.237858S1.912827-3.476961 1.912827-3.765878ZM1.723537-.109589C1.723537 .169365 1.723537 .946451 1.085928 1.683686C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 1.942715 1.125778 1.942715-.029888C1.942715-.318804 1.92279-1.05604 1.384807-1.05604C1.036115-1.05604 .856787-.787049 .856787-.52802S1.026152 0 1.384807 0C1.43462 0 1.454545 0 1.464508-.009963C1.534247-.019925 1.643836-.039851 1.723537-.109589Z'/>
+<path id='g3-61' d='M6.844334-3.257783C6.993773-3.257783 7.183064-3.257783 7.183064-3.457036S6.993773-3.656289 6.854296-3.656289H.886675C.747198-3.656289 .557908-3.656289 .557908-3.457036S.747198-3.257783 .896638-3.257783H6.844334ZM6.854296-1.325031C6.993773-1.325031 7.183064-1.325031 7.183064-1.524284S6.993773-1.723537 6.844334-1.723537H.896638C.747198-1.723537 .557908-1.723537 .557908-1.524284S.747198-1.325031 .886675-1.325031H6.854296Z'/>
+<path id='g3-99' d='M1.165629-2.171856C1.165629-3.795766 1.982565-4.214197 2.510585-4.214197C2.600249-4.214197 3.227895-4.204234 3.576588-3.845579C3.16812-3.815691 3.108344-3.516812 3.108344-3.387298C3.108344-3.128269 3.287671-2.929016 3.566625-2.929016C3.825654-2.929016 4.024907-3.098381 4.024907-3.39726C4.024907-4.07472 3.267746-4.463263 2.500623-4.463263C1.255293-4.463263 .33873-3.387298 .33873-2.15193C.33873-.876712 1.325031 .109589 2.480697 .109589C3.815691 .109589 4.134496-1.085928 4.134496-1.185554S4.034869-1.285181 4.004981-1.285181C3.915318-1.285181 3.895392-1.24533 3.875467-1.185554C3.58655-.259029 2.938979-.139477 2.570361-.139477C2.042341-.139477 1.165629-.56787 1.165629-2.171856Z'/>
+<path id='g3-101' d='M1.115816-2.510585C1.175592-3.995019 2.012453-4.244085 2.351183-4.244085C3.377335-4.244085 3.476961-2.899128 3.476961-2.510585H1.115816ZM1.105853-2.30137H3.88543C4.104608-2.30137 4.134496-2.30137 4.134496-2.510585C4.134496-3.496887 3.596513-4.463263 2.351183-4.463263C1.195517-4.463263 .278954-3.437111 .278954-2.191781C.278954-.856787 1.325031 .109589 2.470735 .109589C3.686177 .109589 4.134496-.996264 4.134496-1.185554C4.134496-1.285181 4.054795-1.305106 4.004981-1.305106C3.915318-1.305106 3.895392-1.24533 3.875467-1.165629C3.526775-.139477 2.630137-.139477 2.530511-.139477C2.032379-.139477 1.633873-.438356 1.404732-.806974C1.105853-1.285181 1.105853-1.942715 1.105853-2.30137Z'/>
+<path id='g3-103' d='M2.211706-1.713574C1.344956-1.713574 1.344956-2.709838 1.344956-2.938979C1.344956-3.20797 1.354919-3.526775 1.504359-3.775841C1.58406-3.895392 1.8132-4.174346 2.211706-4.174346C3.078456-4.174346 3.078456-3.178082 3.078456-2.948941C3.078456-2.67995 3.068493-2.361146 2.919054-2.11208C2.839352-1.992528 2.610212-1.713574 2.211706-1.713574ZM1.05604-1.325031C1.05604-1.364882 1.05604-1.594022 1.225405-1.793275C1.613948-1.514321 2.022416-1.484433 2.211706-1.484433C3.138232-1.484433 3.825654-2.171856 3.825654-2.938979C3.825654-3.307597 3.666252-3.676214 3.417186-3.905355C3.775841-4.244085 4.134496-4.293898 4.313823-4.293898C4.333748-4.293898 4.383562-4.293898 4.41345-4.283935C4.303861-4.244085 4.254047-4.134496 4.254047-4.014944C4.254047-3.845579 4.383562-3.726027 4.542964-3.726027C4.64259-3.726027 4.83188-3.795766 4.83188-4.024907C4.83188-4.194271 4.712329-4.513076 4.323786-4.513076C4.124533-4.513076 3.686177-4.4533 3.267746-4.044832C2.849315-4.373599 2.430884-4.403487 2.211706-4.403487C1.285181-4.403487 .597758-3.716065 .597758-2.948941C.597758-2.510585 .816936-2.132005 1.066002-1.92279C.936488-1.77335 .757161-1.444583 .757161-1.09589C.757161-.787049 .886675-.408468 1.195517-.209215C.597758-.039851 .278954 .388543 .278954 .787049C.278954 1.504359 1.265255 2.052304 2.480697 2.052304C3.656289 2.052304 4.692403 1.544209 4.692403 .767123C4.692403 .418431 4.552927-.089664 4.044832-.368618C3.516812-.647572 2.938979-.647572 2.331258-.647572C2.082192-.647572 1.653798-.647572 1.58406-.657534C1.265255-.697385 1.05604-1.006227 1.05604-1.325031ZM2.49066 1.823163C1.484433 1.823163 .797011 1.315068 .797011 .787049C.797011 .328767 1.175592-.039851 1.613948-.069738H2.201743C3.058531-.069738 4.174346-.069738 4.174346 .787049C4.174346 1.325031 3.466999 1.823163 2.49066 1.823163Z'/>
+<path id='g3-108' d='M1.763387-6.914072L.328767-6.804483V-6.495641C1.026152-6.495641 1.105853-6.425903 1.105853-5.937733V-.757161C1.105853-.308842 .996264-.308842 .328767-.308842V0C.657534-.009963 1.185554-.029888 1.43462-.029888S2.171856-.009963 2.540473 0V-.308842C1.872976-.308842 1.763387-.308842 1.763387-.757161V-6.914072Z'/>
+<path id='g3-111' d='M4.692403-2.132005C4.692403-3.407223 3.696139-4.463263 2.49066-4.463263C1.24533-4.463263 .278954-3.377335 .278954-2.132005C.278954-.846824 1.315068 .109589 2.480697 .109589C3.686177 .109589 4.692403-.86675 4.692403-2.132005ZM2.49066-.139477C2.062267-.139477 1.62391-.348692 1.354919-.806974C1.105853-1.24533 1.105853-1.853051 1.105853-2.211706C1.105853-2.600249 1.105853-3.138232 1.344956-3.576588C1.613948-4.034869 2.082192-4.244085 2.480697-4.244085C2.919054-4.244085 3.347447-4.024907 3.606476-3.596513S3.865504-2.590286 3.865504-2.211706C3.865504-1.853051 3.865504-1.315068 3.646326-.876712C3.427148-.428394 2.988792-.139477 2.49066-.139477Z'/>
+<path id='g3-112' d='M1.713574-3.745953V-4.403487L.278954-4.293898V-3.985056C.986301-3.985056 1.05604-3.92528 1.05604-3.486924V1.175592C1.05604 1.62391 .946451 1.62391 .278954 1.62391V1.932752C.617684 1.92279 1.135741 1.902864 1.39477 1.902864C1.663761 1.902864 2.171856 1.92279 2.520548 1.932752V1.62391C1.853051 1.62391 1.743462 1.62391 1.743462 1.175592V-.498132V-.587796C1.793275-.428394 2.211706 .109589 2.968867 .109589C4.154421 .109589 5.190535-.86675 5.190535-2.15193C5.190535-3.417186 4.224159-4.403487 3.108344-4.403487C2.331258-4.403487 1.912827-3.965131 1.713574-3.745953ZM1.743462-1.135741V-3.35741C2.032379-3.865504 2.520548-4.154421 3.028643-4.154421C3.755915-4.154421 4.363636-3.277709 4.363636-2.15193C4.363636-.946451 3.666252-.109589 2.929016-.109589C2.530511-.109589 2.15193-.308842 1.882939-.71731C1.743462-.926526 1.743462-.936488 1.743462-1.135741Z'/>
+<path id='g3-115' d='M2.072229-1.932752C2.291407-1.892902 3.108344-1.733499 3.108344-1.016189C3.108344-.508095 2.759651-.109589 1.982565-.109589C1.145704-.109589 .787049-.67746 .597758-1.524284C.56787-1.653798 .557908-1.693649 .458281-1.693649C.328767-1.693649 .328767-1.62391 .328767-1.444583V-.129514C.328767 .039851 .328767 .109589 .438356 .109589C.488169 .109589 .498132 .099626 .687422-.089664C.707347-.109589 .707347-.129514 .886675-.318804C1.325031 .099626 1.77335 .109589 1.982565 .109589C3.128269 .109589 3.58655-.557908 3.58655-1.275218C3.58655-1.803238 3.287671-2.102117 3.16812-2.221669C2.839352-2.540473 2.450809-2.620174 2.032379-2.699875C1.474471-2.809465 .806974-2.938979 .806974-3.516812C.806974-3.865504 1.066002-4.273973 1.92279-4.273973C3.01868-4.273973 3.068493-3.377335 3.088418-3.068493C3.098381-2.978829 3.188045-2.978829 3.20797-2.978829C3.337484-2.978829 3.337484-3.028643 3.337484-3.217933V-4.224159C3.337484-4.393524 3.337484-4.463263 3.227895-4.463263C3.178082-4.463263 3.158157-4.463263 3.028643-4.343711C2.998755-4.303861 2.899128-4.214197 2.859278-4.184309C2.480697-4.463263 2.072229-4.463263 1.92279-4.463263C.707347-4.463263 .328767-3.795766 .328767-3.237858C.328767-2.889166 .488169-2.610212 .757161-2.391034C1.075965-2.132005 1.354919-2.072229 2.072229-1.932752Z'/>
+<path id='g3-120' d='M2.859278-2.34122C3.158157-2.719801 3.536737-3.20797 3.775841-3.466999C4.084682-3.825654 4.493151-3.975093 4.961395-3.985056V-4.293898C4.702366-4.273973 4.403487-4.26401 4.144458-4.26401C3.845579-4.26401 3.317559-4.283935 3.188045-4.293898V-3.985056C3.39726-3.965131 3.476961-3.835616 3.476961-3.676214S3.377335-3.387298 3.327522-3.327522L2.709838-2.550436L1.932752-3.556663C1.843088-3.656289 1.843088-3.676214 1.843088-3.73599C1.843088-3.88543 1.992528-3.975093 2.191781-3.985056V-4.293898C1.932752-4.283935 1.275218-4.26401 1.115816-4.26401C.9066-4.26401 .438356-4.273973 .169365-4.293898V-3.985056C.86675-3.985056 .876712-3.975093 1.344956-3.377335L2.331258-2.092154L1.39477-.9066C.916563-.328767 .328767-.308842 .119552-.308842V0C.37858-.019925 .687422-.029888 .946451-.029888C1.235367-.029888 1.653798-.009963 1.892902 0V-.308842C1.673724-.33873 1.603985-.468244 1.603985-.617684C1.603985-.836862 1.892902-1.165629 2.500623-1.882939L3.257783-.886675C3.337484-.777086 3.466999-.617684 3.466999-.557908C3.466999-.468244 3.377335-.318804 3.108344-.308842V0C3.407223-.009963 3.965131-.029888 4.184309-.029888C4.4533-.029888 4.841843-.019925 5.140722 0V-.308842C4.60274-.308842 4.423412-.328767 4.194271-.617684L2.859278-2.34122Z'/>
+<path id='g1-49' d='M4.030884-1.910834C3.66127-2.343213 3.549689-2.461768 3.284682-2.650062C2.859278-2.956912 2.412951-3.075467 2.02939-3.075467C1.164633-3.075467 .536986-2.336239 .536986-1.499377C.536986-.683437 1.136737 .069738 2.008468 .069738C2.977833 .069738 3.654296-.72528 3.905355-1.094894C4.274969-.662516 4.38655-.54396 4.651557-.355666C5.076961-.048817 5.523288 .069738 5.906849 .069738C6.771606 .069738 7.399253-.669489 7.399253-1.506351C7.399253-2.322291 6.799502-3.075467 5.927771-3.075467C4.958406-3.075467 4.281943-2.280448 4.030884-1.910834ZM4.254047-1.652802C4.533001-2.106102 5.139726-2.810461 5.990535-2.810461C6.708842-2.810461 7.203985-2.175841 7.203985-1.506351S6.660025-.313823 6.018431-.313823C5.369863-.313823 4.923537-.843836 4.254047-1.652802ZM3.682192-1.352927C3.403238-.899626 2.796513-.195268 1.945704-.195268C1.227397-.195268 .732254-.829888 .732254-1.499377S1.276214-2.691905 1.917808-2.691905C2.566376-2.691905 3.012702-2.161893 3.682192-1.352927Z'/>
+<path id='g4-48' d='M3.598506-2.224658C3.598506-2.991781 3.507846-3.542715 3.187049-4.030884C2.970859-4.351681 2.538481-4.630635 1.980573-4.630635C.36264-4.630635 .36264-2.726775 .36264-2.224658S.36264 .139477 1.980573 .139477S3.598506-1.72254 3.598506-2.224658ZM1.980573-.055791C1.659776-.055791 1.234371-.244085 1.094894-.81594C.99726-1.227397 .99726-1.799253 .99726-2.315318C.99726-2.824408 .99726-3.354421 1.101868-3.737983C1.248319-4.288917 1.694645-4.435367 1.980573-4.435367C2.357161-4.435367 2.719801-4.20523 2.84533-3.800747C2.956912-3.424159 2.963885-2.922042 2.963885-2.315318C2.963885-1.799253 2.963885-1.283188 2.873225-.843836C2.733748-.209215 2.259527-.055791 1.980573-.055791Z'/>
+<path id='g2-22' d='M2.30137-3.496887C2.351183-3.706102 2.450809-4.084682 2.450809-4.134496C2.450809-4.303861 2.331258-4.403487 2.161893-4.403487C2.132005-4.403487 1.853051-4.393524 1.763387-4.044832L.328767 1.723537C.298879 1.843088 .298879 1.863014 .298879 1.882939C.298879 2.032379 .408468 2.15193 .577833 2.15193C.787049 2.15193 .9066 1.972603 .926526 1.942715C.966376 1.853051 1.09589 1.334994 1.474471-.199253C1.793275 .069738 2.241594 .109589 2.440847 .109589C3.138232 .109589 3.526775-.33873 3.765878-.617684C3.855542-.169365 4.224159 .109589 4.662516 .109589C5.011208 .109589 5.240349-.119552 5.399751-.438356C5.569116-.797011 5.69863-1.404732 5.69863-1.424658C5.69863-1.524284 5.608966-1.524284 5.579078-1.524284C5.479452-1.524284 5.469489-1.484433 5.439601-1.344956C5.270237-.697385 5.090909-.109589 4.682441-.109589C4.41345-.109589 4.383562-.368618 4.383562-.56787C4.383562-.787049 4.503113-1.255293 4.582814-1.603985L4.861768-2.67995C4.891656-2.82939 4.991283-3.20797 5.031133-3.35741C5.080946-3.58655 5.180573-3.965131 5.180573-4.024907C5.180573-4.204234 5.041096-4.293898 4.891656-4.293898C4.841843-4.293898 4.582814-4.283935 4.503113-3.945205L4.034869-2.082192C3.915318-1.58406 3.805729-1.165629 3.775841-1.066002C3.765878-1.016189 3.287671-.109589 2.480697-.109589C1.982565-.109589 1.743462-.438356 1.743462-.976339C1.743462-1.265255 1.8132-1.544209 1.882939-1.823163L2.30137-3.496887Z'/>
+<path id='g2-25' d='M2.6401-3.716065H3.765878C3.437111-2.241594 3.347447-1.8132 3.347447-1.145704C3.347447-.996264 3.347447-.727273 3.427148-.388543C3.526775 .049813 3.636364 .109589 3.785803 .109589C3.985056 .109589 4.194271-.069738 4.194271-.268991C4.194271-.328767 4.194271-.348692 4.134496-.488169C3.845579-1.205479 3.845579-1.853051 3.845579-2.132005C3.845579-2.660025 3.915318-3.198007 4.024907-3.716065H5.160648C5.290162-3.716065 5.648817-3.716065 5.648817-4.054795C5.648817-4.293898 5.439601-4.293898 5.250311-4.293898H1.912827C1.693649-4.293898 1.315068-4.293898 .876712-3.825654C.52802-3.437111 .268991-2.978829 .268991-2.929016C.268991-2.919054 .268991-2.82939 .388543-2.82939C.468244-2.82939 .488169-2.86924 .547945-2.948941C1.036115-3.716065 1.613948-3.716065 1.8132-3.716065H2.381071C2.062267-2.510585 1.524284-1.305106 1.105853-.398506C1.026152-.249066 1.026152-.229141 1.026152-.159402C1.026152 .029888 1.185554 .109589 1.315068 .109589C1.613948 .109589 1.693649-.169365 1.8132-.537983C1.952677-.996264 1.952677-1.016189 2.082192-1.514321L2.6401-3.716065Z'/>
+<path id='g2-59' d='M2.022416-.009963C2.022416-.667497 1.77335-1.05604 1.384807-1.05604C1.05604-1.05604 .856787-.806974 .856787-.52802C.856787-.259029 1.05604 0 1.384807 0C1.504359 0 1.633873-.039851 1.733499-.129514C1.763387-.14944 1.77335-.159402 1.783313-.159402S1.803238-.14944 1.803238-.009963C1.803238 .727273 1.454545 1.325031 1.125778 1.653798C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 2.022416 1.155666 2.022416-.009963Z'/>
+<path id='g2-99' d='M3.945205-3.785803C3.785803-3.785803 3.646326-3.785803 3.506849-3.646326C3.347447-3.496887 3.327522-3.327522 3.327522-3.257783C3.327522-3.01868 3.506849-2.909091 3.696139-2.909091C3.985056-2.909091 4.254047-3.148194 4.254047-3.5467C4.254047-4.034869 3.785803-4.403487 3.078456-4.403487C1.733499-4.403487 .408468-2.978829 .408468-1.574097C.408468-.67746 .986301 .109589 2.022416 .109589C3.447073 .109589 4.283935-.946451 4.283935-1.066002C4.283935-1.125778 4.224159-1.195517 4.164384-1.195517C4.11457-1.195517 4.094645-1.175592 4.034869-1.09589C3.247821-.109589 2.161893-.109589 2.042341-.109589C1.414695-.109589 1.145704-.597758 1.145704-1.195517C1.145704-1.603985 1.344956-2.570361 1.683686-3.188045C1.992528-3.755915 2.540473-4.184309 3.088418-4.184309C3.427148-4.184309 3.805729-4.054795 3.945205-3.785803Z'/>
+<path id='g2-100' d='M5.140722-6.804483C5.140722-6.814446 5.140722-6.914072 5.011208-6.914072C4.861768-6.914072 3.915318-6.824408 3.745953-6.804483C3.666252-6.794521 3.606476-6.744707 3.606476-6.615193C3.606476-6.495641 3.696139-6.495641 3.845579-6.495641C4.323786-6.495641 4.343711-6.425903 4.343711-6.326276L4.313823-6.127024L3.716065-3.765878C3.536737-4.134496 3.247821-4.403487 2.799502-4.403487C1.633873-4.403487 .398506-2.938979 .398506-1.484433C.398506-.547945 .946451 .109589 1.723537 .109589C1.92279 .109589 2.420922 .069738 3.01868-.637609C3.098381-.219178 3.447073 .109589 3.92528 .109589C4.273973 .109589 4.503113-.119552 4.662516-.438356C4.83188-.797011 4.961395-1.404732 4.961395-1.424658C4.961395-1.524284 4.871731-1.524284 4.841843-1.524284C4.742217-1.524284 4.732254-1.484433 4.702366-1.344956C4.533001-.697385 4.353674-.109589 3.945205-.109589C3.676214-.109589 3.646326-.368618 3.646326-.56787C3.646326-.806974 3.666252-.876712 3.706102-1.046077L5.140722-6.804483ZM3.068493-1.185554C3.01868-1.006227 3.01868-.986301 2.86924-.816936C2.430884-.268991 2.022416-.109589 1.743462-.109589C1.24533-.109589 1.105853-.657534 1.105853-1.046077C1.105853-1.544209 1.424658-2.769614 1.653798-3.227895C1.96264-3.815691 2.410959-4.184309 2.809465-4.184309C3.457036-4.184309 3.596513-3.367372 3.596513-3.307597S3.576588-3.188045 3.566625-3.138232L3.068493-1.185554Z'/>
+<path id='g2-102' d='M3.656289-3.985056H4.513076C4.712329-3.985056 4.811955-3.985056 4.811955-4.184309C4.811955-4.293898 4.712329-4.293898 4.542964-4.293898H3.716065L3.92528-5.429639C3.965131-5.638854 4.104608-6.346202 4.164384-6.465753C4.254047-6.655044 4.423412-6.804483 4.632628-6.804483C4.672478-6.804483 4.931507-6.804483 5.120797-6.625156C4.682441-6.585305 4.582814-6.236613 4.582814-6.087173C4.582814-5.858032 4.762142-5.738481 4.951432-5.738481C5.210461-5.738481 5.499377-5.957659 5.499377-6.336239C5.499377-6.794521 5.041096-7.023661 4.632628-7.023661C4.293898-7.023661 3.666252-6.844334 3.367372-5.858032C3.307597-5.648817 3.277709-5.549191 3.038605-4.293898H2.351183C2.161893-4.293898 2.052304-4.293898 2.052304-4.104608C2.052304-3.985056 2.141968-3.985056 2.331258-3.985056H2.988792L2.241594-.049813C2.062267 .916563 1.892902 1.823163 1.374844 1.823163C1.334994 1.823163 1.085928 1.823163 .896638 1.643836C1.354919 1.613948 1.444583 1.255293 1.444583 1.105853C1.444583 .876712 1.265255 .757161 1.075965 .757161C.816936 .757161 .52802 .976339 .52802 1.354919C.52802 1.803238 .966376 2.042341 1.374844 2.042341C1.92279 2.042341 2.321295 1.454545 2.500623 1.075965C2.819427 .448319 3.048568-.757161 3.058531-.826899L3.656289-3.985056Z'/>
+<path id='g2-116' d='M2.052304-3.985056H2.988792C3.188045-3.985056 3.287671-3.985056 3.287671-4.184309C3.287671-4.293898 3.188045-4.293898 3.008717-4.293898H2.132005C2.49066-5.708593 2.540473-5.907846 2.540473-5.967621C2.540473-6.136986 2.420922-6.236613 2.251557-6.236613C2.221669-6.236613 1.942715-6.22665 1.853051-5.877958L1.464508-4.293898H.52802C.328767-4.293898 .229141-4.293898 .229141-4.104608C.229141-3.985056 .308842-3.985056 .508095-3.985056H1.384807C.667497-1.155666 .627646-.986301 .627646-.806974C.627646-.268991 1.006227 .109589 1.544209 .109589C2.560399 .109589 3.128269-1.344956 3.128269-1.424658C3.128269-1.524284 3.048568-1.524284 3.008717-1.524284C2.919054-1.524284 2.909091-1.494396 2.859278-1.384807C2.430884-.348692 1.902864-.109589 1.564134-.109589C1.354919-.109589 1.255293-.239103 1.255293-.56787C1.255293-.806974 1.275218-.876712 1.315068-1.046077L2.052304-3.985056Z'/>
+<path id='g2-120' d='M3.327522-3.008717C3.387298-3.267746 3.616438-4.184309 4.313823-4.184309C4.363636-4.184309 4.60274-4.184309 4.811955-4.054795C4.533001-4.004981 4.333748-3.755915 4.333748-3.516812C4.333748-3.35741 4.443337-3.16812 4.712329-3.16812C4.931507-3.16812 5.250311-3.347447 5.250311-3.745953C5.250311-4.26401 4.662516-4.403487 4.323786-4.403487C3.745953-4.403487 3.39726-3.875467 3.277709-3.646326C3.028643-4.303861 2.49066-4.403487 2.201743-4.403487C1.165629-4.403487 .597758-3.118306 .597758-2.86924C.597758-2.769614 .697385-2.769614 .71731-2.769614C.797011-2.769614 .826899-2.789539 .846824-2.879203C1.185554-3.935243 1.843088-4.184309 2.181818-4.184309C2.371108-4.184309 2.719801-4.094645 2.719801-3.516812C2.719801-3.20797 2.550436-2.540473 2.181818-1.145704C2.022416-.52802 1.673724-.109589 1.235367-.109589C1.175592-.109589 .946451-.109589 .737235-.239103C.986301-.288917 1.205479-.498132 1.205479-.777086C1.205479-1.046077 .986301-1.125778 .836862-1.125778C.537983-1.125778 .288917-.86675 .288917-.547945C.288917-.089664 .787049 .109589 1.225405 .109589C1.882939 .109589 2.241594-.587796 2.271482-.647572C2.391034-.278954 2.749689 .109589 3.347447 .109589C4.373599 .109589 4.941469-1.175592 4.941469-1.424658C4.941469-1.524284 4.851806-1.524284 4.821918-1.524284C4.732254-1.524284 4.712329-1.484433 4.692403-1.414695C4.363636-.348692 3.686177-.109589 3.367372-.109589C2.978829-.109589 2.819427-.428394 2.819427-.767123C2.819427-.986301 2.879203-1.205479 2.988792-1.643836L3.327522-3.008717Z'/>
+<path id='g5-18' d='M6.973848 23.402242C6.973848 23.362391 6.953923 23.342466 6.933998 23.312578C6.56538 22.94396 5.897883 22.276463 5.230386 21.200498C3.626401 18.630137 2.899128 15.392279 2.899128 11.556663C2.899128 8.876712 3.257783 5.419676 4.901619 2.450809C5.688667 1.036115 6.505604 .219178 6.94396-.219178C6.973848-.249066 6.973848-.268991 6.973848-.298879C6.973848-.398506 6.90411-.398506 6.764633-.398506S6.60523-.398506 6.455791-.249066C3.118306 2.789539 2.072229 7.352428 2.072229 11.5467C2.072229 15.462017 2.968867 19.407223 5.499377 22.386052C5.69863 22.615193 6.07721 23.023661 6.485679 23.382316C6.60523 23.501868 6.625156 23.501868 6.764633 23.501868S6.973848 23.501868 6.973848 23.402242Z'/>
+<path id='g5-19' d='M5.250311 11.556663C5.250311 7.641345 4.353674 3.696139 1.823163 .71731C1.62391 .488169 1.24533 .079701 .836862-.278954C.71731-.398506 .697385-.398506 .557908-.398506C.438356-.398506 .348692-.398506 .348692-.298879C.348692-.259029 .388543-.219178 .408468-.199253C.757161 .159402 1.424658 .826899 2.092154 1.902864C3.696139 4.473225 4.423412 7.711083 4.423412 11.5467C4.423412 14.22665 4.064757 17.683686 2.420922 20.652553C1.633873 22.067248 .806974 22.894147 .388543 23.312578C.368618 23.342466 .348692 23.372354 .348692 23.402242C.348692 23.501868 .438356 23.501868 .557908 23.501868C.697385 23.501868 .71731 23.501868 .86675 23.352428C4.204234 20.313823 5.250311 15.750934 5.250311 11.556663Z'/>
+<path id='g5-90' d='M1.036115 21.688667C1.354919 21.668742 1.524284 21.449564 1.524284 21.200498C1.524284 20.871731 1.275218 20.712329 1.046077 20.712329C.806974 20.712329 .557908 20.861768 .557908 21.210461C.557908 21.718555 1.05604 22.136986 1.663761 22.136986C3.178082 22.136986 3.745953 19.805729 4.4533 16.916563C5.220423 13.768369 5.867995 10.590286 6.405978 7.392279C6.774595 5.270237 7.143213 3.277709 7.481943 1.992528C7.601494 1.504359 7.940224 .219178 8.328767 .219178C8.637609 .219178 8.886675 .408468 8.926526 .448319C8.597758 .468244 8.428394 .687422 8.428394 .936488C8.428394 1.265255 8.67746 1.424658 8.9066 1.424658C9.145704 1.424658 9.39477 1.275218 9.39477 .926526C9.39477 .388543 8.856787 0 8.308842 0C7.551681 0 6.993773 1.085928 6.445828 3.118306C6.41594 3.227895 5.061021 8.229141 3.965131 14.744707C3.706102 16.268991 3.417186 17.932752 3.088418 19.317559C2.909091 20.044832 2.450809 21.917808 1.643836 21.917808C1.285181 21.917808 1.046077 21.688667 1.036115 21.688667Z'/>
+</defs>
+<g id='page1'>
+<use x='98.97879' y='78.206831' xlink:href='#g2-102'/>
+<use x='104.928737' y='78.206831' xlink:href='#g3-40'/>
+<use x='108.803111' y='78.206831' xlink:href='#g2-120'/>
+<use x='114.497043' y='78.206831' xlink:href='#g3-59'/>
+<use x='118.924851' y='78.206831' xlink:href='#g2-22'/>
+<use x='124.927835' y='78.206831' xlink:href='#g2-59'/>
+<use x='129.355643' y='78.206831' xlink:href='#g2-99'/>
+<use x='133.66704' y='78.206831' xlink:href='#g3-41'/>
+<use x='140.308746' y='78.206831' xlink:href='#g3-61'/>
+<use x='154.703576' y='71.46703' xlink:href='#g3-49'/>
+<rect x='152.02034' y='75.516921' height='.398484' width='10.347808'/>
+<use x='152.02034' y='85.040701' xlink:href='#g2-25'/>
+<use x='158.056757' y='85.040701' xlink:href='#g2-99'/>
+<use x='165.224061' y='64.646461' xlink:href='#g5-90'/>
+<use x='175.186716' y='67.109424' xlink:href='#g1-49'/>
+<use x='170.758878' y='87.284013' xlink:href='#g4-48'/>
+<use x='185.287707' y='78.206831' xlink:href='#g3-101'/>
+<use x='189.715559' y='78.206831' xlink:href='#g3-120'/>
+<use x='194.973647' y='78.206831' xlink:href='#g3-112'/>
+<use x='200.508446' y='78.206831' xlink:href='#g3-40'/>
+<use x='204.38282' y='78.206831' xlink:href='#g0-0'/>
+<use x='212.131568' y='78.206831' xlink:href='#g2-116'/>
+<use x='215.729203' y='78.206831' xlink:href='#g3-41'/>
+<use x='221.263976' y='78.206831' xlink:href='#g3-99'/>
+<use x='225.691827' y='78.206831' xlink:href='#g3-111'/>
+<use x='230.673166' y='78.206831' xlink:href='#g3-115'/>
+<use x='236.263255' y='64.159381' xlink:href='#g5-18'/>
+<use x='243.596899' y='78.206831' xlink:href='#g2-116'/>
+<use x='248.854933' y='64.159381' xlink:href='#g5-18'/>
+<use x='257.384091' y='71.46703' xlink:href='#g2-120'/>
+<use x='265.291889' y='71.46703' xlink:href='#g0-0'/>
+<use x='275.254503' y='71.46703' xlink:href='#g2-22'/>
+<rect x='257.384091' y='75.516921' height='.398484' width='23.873366'/>
+<use x='267.165085' y='85.040701' xlink:href='#g2-99'/>
+<use x='282.45297' y='64.159381' xlink:href='#g5-19'/>
+<use x='292.00048' y='78.206831' xlink:href='#g3-43'/>
+<use x='303.158607' y='71.46703' xlink:href='#g3-50'/>
+<use x='308.139946' y='71.46703' xlink:href='#g2-116'/>
+<rect x='303.158607' y='75.516921' height='.398484' width='8.578959'/>
+<use x='304.429887' y='85.040701' xlink:href='#g2-25'/>
+<use x='314.593479' y='78.206831' xlink:href='#g3-108'/>
+<use x='317.360888' y='78.206831' xlink:href='#g3-111'/>
+<use x='322.342227' y='78.206831' xlink:href='#g3-103'/>
+<use x='329.122309' y='64.159381' xlink:href='#g5-18'/>
+<use x='338.008359' y='71.46703' xlink:href='#g2-116'/>
+<rect x='337.651467' y='75.516921' height='.398484' width='4.311393'/>
+<use x='337.651467' y='85.040701' xlink:href='#g2-99'/>
+<use x='343.158373' y='64.159381' xlink:href='#g5-19'/>
+<use x='350.492017' y='64.159381' xlink:href='#g5-19'/>
+<use x='359.486045' y='78.206831' xlink:href='#g2-100'/>
+<use x='364.671478' y='78.206831' xlink:href='#g2-116'/>
+</g>
+</svg>
\ No newline at end of file
diff --git a/doc/equations/mapairy_ref1.svg b/doc/equations/mapairy_ref1.svg
new file mode 100644
index 0000000000..f0052a3bac
--- /dev/null
+++ b/doc/equations/mapairy_ref1.svg
@@ -0,0 +1,86 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!-- This file was generated by dvisvgm 3.0.3 -->
+<svg version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='263.480388pt' height='23.910579pt' viewBox='101.883774 63.760885 263.480388 23.910579'>
+<defs>
+<path id='g1-48' d='M2.022416-3.291656C2.078207-3.410212 2.085181-3.466002 2.085181-3.514819C2.085181-3.731009 1.889913-3.898381 1.673724-3.898381C1.408717-3.898381 1.325031-3.682192 1.290162-3.57061L.369614-.550934C.36264-.536986 .334745-.446326 .334745-.439352C.334745-.355666 .550934-.285928 .606725-.285928C.655542-.285928 .662516-.299875 .711333-.404483L2.022416-3.291656Z'/>
+<path id='g3-40' d='M3.297634 2.391034C3.297634 2.361146 3.297634 2.34122 3.128269 2.171856C1.882939 .916563 1.564134-.966376 1.564134-2.49066C1.564134-4.224159 1.942715-5.957659 3.16812-7.202989C3.297634-7.32254 3.297634-7.342466 3.297634-7.372354C3.297634-7.442092 3.257783-7.47198 3.198007-7.47198C3.098381-7.47198 2.201743-6.794521 1.613948-5.529265C1.105853-4.433375 .986301-3.327522 .986301-2.49066C.986301-1.713574 1.09589-.508095 1.643836 .617684C2.241594 1.843088 3.098381 2.49066 3.198007 2.49066C3.257783 2.49066 3.297634 2.460772 3.297634 2.391034Z'/>
+<path id='g3-41' d='M2.879203-2.49066C2.879203-3.267746 2.769614-4.473225 2.221669-5.599004C1.62391-6.824408 .767123-7.47198 .667497-7.47198C.607721-7.47198 .56787-7.43213 .56787-7.372354C.56787-7.342466 .56787-7.32254 .757161-7.143213C1.733499-6.156912 2.30137-4.572852 2.30137-2.49066C2.30137-.787049 1.932752 .966376 .697385 2.221669C.56787 2.34122 .56787 2.361146 .56787 2.391034C.56787 2.450809 .607721 2.49066 .667497 2.49066C.767123 2.49066 1.663761 1.8132 2.251557 .547945C2.759651-.547945 2.879203-1.653798 2.879203-2.49066Z'/>
+<path id='g3-48' d='M4.582814-3.188045C4.582814-3.985056 4.533001-4.782067 4.184309-5.519303C3.726027-6.475716 2.909091-6.635118 2.49066-6.635118C1.892902-6.635118 1.165629-6.37609 .757161-5.449564C.438356-4.762142 .388543-3.985056 .388543-3.188045C.388543-2.440847 .428394-1.544209 .836862-.787049C1.265255 .019925 1.992528 .219178 2.480697 .219178C3.01868 .219178 3.775841 .009963 4.214197-.936488C4.533001-1.62391 4.582814-2.400996 4.582814-3.188045ZM2.480697 0C2.092154 0 1.504359-.249066 1.325031-1.205479C1.215442-1.803238 1.215442-2.719801 1.215442-3.307597C1.215442-3.945205 1.215442-4.60274 1.295143-5.140722C1.484433-6.326276 2.231631-6.41594 2.480697-6.41594C2.809465-6.41594 3.466999-6.236613 3.656289-5.250311C3.755915-4.692403 3.755915-3.935243 3.755915-3.307597C3.755915-2.560399 3.755915-1.882939 3.646326-1.24533C3.496887-.298879 2.929016 0 2.480697 0Z'/>
+<path id='g3-49' d='M2.929016-6.37609C2.929016-6.615193 2.929016-6.635118 2.699875-6.635118C2.082192-5.997509 1.205479-5.997509 .886675-5.997509V-5.688667C1.085928-5.688667 1.673724-5.688667 2.191781-5.947696V-.787049C2.191781-.428394 2.161893-.308842 1.265255-.308842H.946451V0C1.295143-.029888 2.161893-.029888 2.560399-.029888S3.825654-.029888 4.174346 0V-.308842H3.855542C2.958904-.308842 2.929016-.418431 2.929016-.787049V-6.37609Z'/>
+<path id='g3-50' d='M1.265255-.767123L2.321295-1.793275C3.875467-3.16812 4.473225-3.706102 4.473225-4.702366C4.473225-5.838107 3.576588-6.635118 2.361146-6.635118C1.235367-6.635118 .498132-5.718555 .498132-4.83188C.498132-4.273973 .996264-4.273973 1.026152-4.273973C1.195517-4.273973 1.544209-4.393524 1.544209-4.801993C1.544209-5.061021 1.364882-5.32005 1.016189-5.32005C.936488-5.32005 .916563-5.32005 .886675-5.310087C1.115816-5.957659 1.653798-6.326276 2.231631-6.326276C3.138232-6.326276 3.566625-5.519303 3.566625-4.702366C3.566625-3.905355 3.068493-3.118306 2.520548-2.500623L.607721-.368618C.498132-.259029 .498132-.239103 .498132 0H4.194271L4.473225-1.733499H4.224159C4.174346-1.43462 4.104608-.996264 4.004981-.846824C3.935243-.767123 3.277709-.767123 3.058531-.767123H1.265255Z'/>
+<path id='g3-51' d='M2.889166-3.506849C3.706102-3.775841 4.283935-4.473225 4.283935-5.260274C4.283935-6.07721 3.407223-6.635118 2.450809-6.635118C1.444583-6.635118 .687422-6.03736 .687422-5.280199C.687422-4.951432 .9066-4.762142 1.195517-4.762142C1.504359-4.762142 1.703611-4.98132 1.703611-5.270237C1.703611-5.768369 1.235367-5.768369 1.085928-5.768369C1.39477-6.256538 2.052304-6.386052 2.410959-6.386052C2.819427-6.386052 3.367372-6.166874 3.367372-5.270237C3.367372-5.150685 3.347447-4.572852 3.088418-4.134496C2.789539-3.656289 2.450809-3.626401 2.201743-3.616438C2.122042-3.606476 1.882939-3.58655 1.8132-3.58655C1.733499-3.576588 1.663761-3.566625 1.663761-3.466999C1.663761-3.35741 1.733499-3.35741 1.902864-3.35741H2.34122C3.158157-3.35741 3.526775-2.67995 3.526775-1.703611C3.526775-.348692 2.839352-.059776 2.400996-.059776C1.972603-.059776 1.225405-.229141 .876712-.816936C1.225405-.767123 1.534247-.986301 1.534247-1.364882C1.534247-1.723537 1.265255-1.92279 .976339-1.92279C.737235-1.92279 .418431-1.783313 .418431-1.344956C.418431-.438356 1.344956 .219178 2.430884 .219178C3.646326 .219178 4.552927-.687422 4.552927-1.703611C4.552927-2.520548 3.92528-3.297634 2.889166-3.506849Z'/>
+<path id='g3-56' d='M1.62391-4.552927C1.165629-4.851806 1.125778-5.190535 1.125778-5.3599C1.125778-5.967621 1.77335-6.386052 2.480697-6.386052C3.20797-6.386052 3.845579-5.867995 3.845579-5.150685C3.845579-4.582814 3.457036-4.104608 2.859278-3.755915L1.62391-4.552927ZM3.078456-3.606476C3.795766-3.975093 4.283935-4.493151 4.283935-5.150685C4.283935-6.067248 3.39726-6.635118 2.49066-6.635118C1.494396-6.635118 .687422-5.897883 .687422-4.971357C.687422-4.79203 .707347-4.343711 1.125778-3.875467C1.235367-3.755915 1.603985-3.506849 1.853051-3.337484C1.275218-3.048568 .418431-2.49066 .418431-1.504359C.418431-.448319 1.43462 .219178 2.480697 .219178C3.606476 .219178 4.552927-.607721 4.552927-1.673724C4.552927-2.032379 4.443337-2.480697 4.064757-2.899128C3.875467-3.108344 3.716065-3.20797 3.078456-3.606476ZM2.082192-3.188045L3.307597-2.410959C3.58655-2.221669 4.054795-1.92279 4.054795-1.315068C4.054795-.577833 3.307597-.059776 2.49066-.059776C1.633873-.059776 .916563-.67746 .916563-1.504359C.916563-2.082192 1.235367-2.719801 2.082192-3.188045Z'/>
+<path id='g3-59' d='M1.912827-3.765878C1.912827-4.054795 1.673724-4.293898 1.384807-4.293898S.856787-4.054795 .856787-3.765878S1.09589-3.237858 1.384807-3.237858S1.912827-3.476961 1.912827-3.765878ZM1.723537-.109589C1.723537 .169365 1.723537 .946451 1.085928 1.683686C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 1.942715 1.125778 1.942715-.029888C1.942715-.318804 1.92279-1.05604 1.384807-1.05604C1.036115-1.05604 .856787-.787049 .856787-.52802S1.026152 0 1.384807 0C1.43462 0 1.454545 0 1.464508-.009963C1.534247-.019925 1.643836-.039851 1.723537-.109589Z'/>
+<path id='g3-61' d='M6.844334-3.257783C6.993773-3.257783 7.183064-3.257783 7.183064-3.457036S6.993773-3.656289 6.854296-3.656289H.886675C.747198-3.656289 .557908-3.656289 .557908-3.457036S.747198-3.257783 .896638-3.257783H6.844334ZM6.854296-1.325031C6.993773-1.325031 7.183064-1.325031 7.183064-1.524284S6.993773-1.723537 6.844334-1.723537H.896638C.747198-1.723537 .557908-1.723537 .557908-1.524284S.747198-1.325031 .886675-1.325031H6.854296Z'/>
+<path id='g3-65' d='M3.965131-6.933998C3.915318-7.063512 3.895392-7.13325 3.73599-7.13325S3.5467-7.073474 3.496887-6.933998L1.43462-.976339C1.255293-.468244 .856787-.318804 .318804-.308842V0C.547945-.009963 .976339-.029888 1.334994-.029888C1.643836-.029888 2.161893-.009963 2.480697 0V-.308842C1.982565-.308842 1.733499-.557908 1.733499-.816936C1.733499-.846824 1.743462-.946451 1.753425-.966376L2.211706-2.271482H4.672478L5.200498-.747198C5.210461-.707347 5.230386-.647572 5.230386-.607721C5.230386-.308842 4.672478-.308842 4.403487-.308842V0C4.762142-.029888 5.459527-.029888 5.838107-.029888C6.266501-.029888 6.724782-.019925 7.143213 0V-.308842H6.963885C6.366127-.308842 6.22665-.37858 6.117061-.707347L3.965131-6.933998ZM3.437111-5.818182L4.562889-2.580324H2.321295L3.437111-5.818182Z'/>
+<path id='g3-101' d='M1.115816-2.510585C1.175592-3.995019 2.012453-4.244085 2.351183-4.244085C3.377335-4.244085 3.476961-2.899128 3.476961-2.510585H1.115816ZM1.105853-2.30137H3.88543C4.104608-2.30137 4.134496-2.30137 4.134496-2.510585C4.134496-3.496887 3.596513-4.463263 2.351183-4.463263C1.195517-4.463263 .278954-3.437111 .278954-2.191781C.278954-.856787 1.325031 .109589 2.470735 .109589C3.686177 .109589 4.134496-.996264 4.134496-1.185554C4.134496-1.285181 4.054795-1.305106 4.004981-1.305106C3.915318-1.305106 3.895392-1.24533 3.875467-1.165629C3.526775-.139477 2.630137-.139477 2.530511-.139477C2.032379-.139477 1.633873-.438356 1.404732-.806974C1.105853-1.285181 1.105853-1.942715 1.105853-2.30137Z'/>
+<path id='g3-105' d='M1.763387-4.403487L.368618-4.293898V-3.985056C1.016189-3.985056 1.105853-3.92528 1.105853-3.437111V-.757161C1.105853-.308842 .996264-.308842 .328767-.308842V0C.647572-.009963 1.185554-.029888 1.424658-.029888C1.77335-.029888 2.122042-.009963 2.460772 0V-.308842C1.803238-.308842 1.763387-.358655 1.763387-.747198V-4.403487ZM1.803238-6.136986C1.803238-6.455791 1.554172-6.665006 1.275218-6.665006C.966376-6.665006 .747198-6.396015 .747198-6.136986C.747198-5.867995 .966376-5.608966 1.275218-5.608966C1.554172-5.608966 1.803238-5.818182 1.803238-6.136986Z'/>
+<path id='g3-112' d='M1.713574-3.745953V-4.403487L.278954-4.293898V-3.985056C.986301-3.985056 1.05604-3.92528 1.05604-3.486924V1.175592C1.05604 1.62391 .946451 1.62391 .278954 1.62391V1.932752C.617684 1.92279 1.135741 1.902864 1.39477 1.902864C1.663761 1.902864 2.171856 1.92279 2.520548 1.932752V1.62391C1.853051 1.62391 1.743462 1.62391 1.743462 1.175592V-.498132V-.587796C1.793275-.428394 2.211706 .109589 2.968867 .109589C4.154421 .109589 5.190535-.86675 5.190535-2.15193C5.190535-3.417186 4.224159-4.403487 3.108344-4.403487C2.331258-4.403487 1.912827-3.965131 1.713574-3.745953ZM1.743462-1.135741V-3.35741C2.032379-3.865504 2.520548-4.154421 3.028643-4.154421C3.755915-4.154421 4.363636-3.277709 4.363636-2.15193C4.363636-.946451 3.666252-.109589 2.929016-.109589C2.530511-.109589 2.15193-.308842 1.882939-.71731C1.743462-.926526 1.743462-.936488 1.743462-1.135741Z'/>
+<path id='g3-120' d='M2.859278-2.34122C3.158157-2.719801 3.536737-3.20797 3.775841-3.466999C4.084682-3.825654 4.493151-3.975093 4.961395-3.985056V-4.293898C4.702366-4.273973 4.403487-4.26401 4.144458-4.26401C3.845579-4.26401 3.317559-4.283935 3.188045-4.293898V-3.985056C3.39726-3.965131 3.476961-3.835616 3.476961-3.676214S3.377335-3.387298 3.327522-3.327522L2.709838-2.550436L1.932752-3.556663C1.843088-3.656289 1.843088-3.676214 1.843088-3.73599C1.843088-3.88543 1.992528-3.975093 2.191781-3.985056V-4.293898C1.932752-4.283935 1.275218-4.26401 1.115816-4.26401C.9066-4.26401 .438356-4.273973 .169365-4.293898V-3.985056C.86675-3.985056 .876712-3.975093 1.344956-3.377335L2.331258-2.092154L1.39477-.9066C.916563-.328767 .328767-.308842 .119552-.308842V0C.37858-.019925 .687422-.029888 .946451-.029888C1.235367-.029888 1.653798-.009963 1.892902 0V-.308842C1.673724-.33873 1.603985-.468244 1.603985-.617684C1.603985-.836862 1.892902-1.165629 2.500623-1.882939L3.257783-.886675C3.337484-.777086 3.466999-.617684 3.466999-.557908C3.466999-.468244 3.377335-.318804 3.108344-.308842V0C3.407223-.009963 3.965131-.029888 4.184309-.029888C4.4533-.029888 4.841843-.019925 5.140722 0V-.308842C4.60274-.308842 4.423412-.328767 4.194271-.617684L2.859278-2.34122Z'/>
+<path id='g4-50' d='M3.521793-1.26924H3.284682C3.263761-1.115816 3.194022-.704359 3.103362-.63462C3.047572-.592777 2.510585-.592777 2.412951-.592777H1.129763C1.862017-1.241345 2.106102-1.436613 2.524533-1.764384C3.040598-2.175841 3.521793-2.608219 3.521793-3.270735C3.521793-4.11457 2.782565-4.630635 1.889913-4.630635C1.025156-4.630635 .439352-4.02391 .439352-3.382316C.439352-3.02665 .739228-2.991781 .808966-2.991781C.976339-2.991781 1.17858-3.110336 1.17858-3.361395C1.17858-3.486924 1.129763-3.731009 .767123-3.731009C.983313-4.226152 1.457534-4.379577 1.785305-4.379577C2.48269-4.379577 2.84533-3.835616 2.84533-3.270735C2.84533-2.66401 2.412951-2.182814 2.189788-1.931756L.509091-.27198C.439352-.209215 .439352-.195268 .439352 0H3.312578L3.521793-1.26924Z'/>
+<path id='g4-51' d='M1.903861-2.329265C2.447821-2.329265 2.838356-1.952677 2.838356-1.206476C2.838356-.341719 2.336239-.083686 1.931756-.083686C1.652802-.083686 1.039103-.160399 .746202-.571856C1.073973-.585803 1.150685-.81594 1.150685-.962391C1.150685-1.185554 .983313-1.345953 .767123-1.345953C.571856-1.345953 .376588-1.227397 .376588-.941469C.376588-.285928 1.101868 .139477 1.945704 .139477C2.915068 .139477 3.584558-.509091 3.584558-1.206476C3.584558-1.750436 3.138232-2.294396 2.371108-2.454795C3.103362-2.719801 3.368369-3.242839 3.368369-3.668244C3.368369-4.219178 2.733748-4.630635 1.959651-4.630635S.592777-4.254047 .592777-3.696139C.592777-3.459029 .746202-3.326526 .955417-3.326526C1.171606-3.326526 1.311083-3.486924 1.311083-3.682192C1.311083-3.884433 1.171606-4.030884 .955417-4.044832C1.199502-4.351681 1.680697-4.428394 1.93873-4.428394C2.252553-4.428394 2.691905-4.274969 2.691905-3.668244C2.691905-3.375342 2.594271-3.054545 2.412951-2.838356C2.182814-2.57335 1.987547-2.559402 1.638854-2.538481C1.464508-2.524533 1.45056-2.524533 1.415691-2.517559C1.401743-2.517559 1.345953-2.503611 1.345953-2.426899C1.345953-2.329265 1.408717-2.329265 1.527273-2.329265H1.903861Z'/>
+<path id='g2-22' d='M2.30137-3.496887C2.351183-3.706102 2.450809-4.084682 2.450809-4.134496C2.450809-4.303861 2.331258-4.403487 2.161893-4.403487C2.132005-4.403487 1.853051-4.393524 1.763387-4.044832L.328767 1.723537C.298879 1.843088 .298879 1.863014 .298879 1.882939C.298879 2.032379 .408468 2.15193 .577833 2.15193C.787049 2.15193 .9066 1.972603 .926526 1.942715C.966376 1.853051 1.09589 1.334994 1.474471-.199253C1.793275 .069738 2.241594 .109589 2.440847 .109589C3.138232 .109589 3.526775-.33873 3.765878-.617684C3.855542-.169365 4.224159 .109589 4.662516 .109589C5.011208 .109589 5.240349-.119552 5.399751-.438356C5.569116-.797011 5.69863-1.404732 5.69863-1.424658C5.69863-1.524284 5.608966-1.524284 5.579078-1.524284C5.479452-1.524284 5.469489-1.484433 5.439601-1.344956C5.270237-.697385 5.090909-.109589 4.682441-.109589C4.41345-.109589 4.383562-.368618 4.383562-.56787C4.383562-.787049 4.503113-1.255293 4.582814-1.603985L4.861768-2.67995C4.891656-2.82939 4.991283-3.20797 5.031133-3.35741C5.080946-3.58655 5.180573-3.965131 5.180573-4.024907C5.180573-4.204234 5.041096-4.293898 4.891656-4.293898C4.841843-4.293898 4.582814-4.283935 4.503113-3.945205L4.034869-2.082192C3.915318-1.58406 3.805729-1.165629 3.775841-1.066002C3.765878-1.016189 3.287671-.109589 2.480697-.109589C1.982565-.109589 1.743462-.438356 1.743462-.976339C1.743462-1.265255 1.8132-1.544209 1.882939-1.823163L2.30137-3.496887Z'/>
+<path id='g2-59' d='M2.022416-.009963C2.022416-.667497 1.77335-1.05604 1.384807-1.05604C1.05604-1.05604 .856787-.806974 .856787-.52802C.856787-.259029 1.05604 0 1.384807 0C1.504359 0 1.633873-.039851 1.733499-.129514C1.763387-.14944 1.77335-.159402 1.783313-.159402S1.803238-.14944 1.803238-.009963C1.803238 .727273 1.454545 1.325031 1.125778 1.653798C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 2.022416 1.155666 2.022416-.009963Z'/>
+<path id='g2-61' d='M4.373599-7.0934C4.423412-7.222914 4.423412-7.262765 4.423412-7.272727C4.423412-7.382316 4.333748-7.47198 4.224159-7.47198C4.154421-7.47198 4.084682-7.442092 4.054795-7.382316L.597758 2.11208C.547945 2.241594 .547945 2.281445 .547945 2.291407C.547945 2.400996 .637609 2.49066 .747198 2.49066C.876712 2.49066 .9066 2.420922 .966376 2.251557L4.373599-7.0934Z'/>
+<path id='g2-99' d='M3.945205-3.785803C3.785803-3.785803 3.646326-3.785803 3.506849-3.646326C3.347447-3.496887 3.327522-3.327522 3.327522-3.257783C3.327522-3.01868 3.506849-2.909091 3.696139-2.909091C3.985056-2.909091 4.254047-3.148194 4.254047-3.5467C4.254047-4.034869 3.785803-4.403487 3.078456-4.403487C1.733499-4.403487 .408468-2.978829 .408468-1.574097C.408468-.67746 .986301 .109589 2.022416 .109589C3.447073 .109589 4.283935-.946451 4.283935-1.066002C4.283935-1.125778 4.224159-1.195517 4.164384-1.195517C4.11457-1.195517 4.094645-1.175592 4.034869-1.09589C3.247821-.109589 2.161893-.109589 2.042341-.109589C1.414695-.109589 1.145704-.597758 1.145704-1.195517C1.145704-1.603985 1.344956-2.570361 1.683686-3.188045C1.992528-3.755915 2.540473-4.184309 3.088418-4.184309C3.427148-4.184309 3.805729-4.054795 3.945205-3.785803Z'/>
+<path id='g2-102' d='M3.656289-3.985056H4.513076C4.712329-3.985056 4.811955-3.985056 4.811955-4.184309C4.811955-4.293898 4.712329-4.293898 4.542964-4.293898H3.716065L3.92528-5.429639C3.965131-5.638854 4.104608-6.346202 4.164384-6.465753C4.254047-6.655044 4.423412-6.804483 4.632628-6.804483C4.672478-6.804483 4.931507-6.804483 5.120797-6.625156C4.682441-6.585305 4.582814-6.236613 4.582814-6.087173C4.582814-5.858032 4.762142-5.738481 4.951432-5.738481C5.210461-5.738481 5.499377-5.957659 5.499377-6.336239C5.499377-6.794521 5.041096-7.023661 4.632628-7.023661C4.293898-7.023661 3.666252-6.844334 3.367372-5.858032C3.307597-5.648817 3.277709-5.549191 3.038605-4.293898H2.351183C2.161893-4.293898 2.052304-4.293898 2.052304-4.104608C2.052304-3.985056 2.141968-3.985056 2.331258-3.985056H2.988792L2.241594-.049813C2.062267 .916563 1.892902 1.823163 1.374844 1.823163C1.334994 1.823163 1.085928 1.823163 .896638 1.643836C1.354919 1.613948 1.444583 1.255293 1.444583 1.105853C1.444583 .876712 1.265255 .757161 1.075965 .757161C.816936 .757161 .52802 .976339 .52802 1.354919C.52802 1.803238 .966376 2.042341 1.374844 2.042341C1.92279 2.042341 2.321295 1.454545 2.500623 1.075965C2.819427 .448319 3.048568-.757161 3.058531-.826899L3.656289-3.985056Z'/>
+<path id='g2-120' d='M3.327522-3.008717C3.387298-3.267746 3.616438-4.184309 4.313823-4.184309C4.363636-4.184309 4.60274-4.184309 4.811955-4.054795C4.533001-4.004981 4.333748-3.755915 4.333748-3.516812C4.333748-3.35741 4.443337-3.16812 4.712329-3.16812C4.931507-3.16812 5.250311-3.347447 5.250311-3.745953C5.250311-4.26401 4.662516-4.403487 4.323786-4.403487C3.745953-4.403487 3.39726-3.875467 3.277709-3.646326C3.028643-4.303861 2.49066-4.403487 2.201743-4.403487C1.165629-4.403487 .597758-3.118306 .597758-2.86924C.597758-2.769614 .697385-2.769614 .71731-2.769614C.797011-2.769614 .826899-2.789539 .846824-2.879203C1.185554-3.935243 1.843088-4.184309 2.181818-4.184309C2.371108-4.184309 2.719801-4.094645 2.719801-3.516812C2.719801-3.20797 2.550436-2.540473 2.181818-1.145704C2.022416-.52802 1.673724-.109589 1.235367-.109589C1.175592-.109589 .946451-.109589 .737235-.239103C.986301-.288917 1.205479-.498132 1.205479-.777086C1.205479-1.046077 .986301-1.125778 .836862-1.125778C.537983-1.125778 .288917-.86675 .288917-.547945C.288917-.089664 .787049 .109589 1.225405 .109589C1.882939 .109589 2.241594-.587796 2.271482-.647572C2.391034-.278954 2.749689 .109589 3.347447 .109589C4.373599 .109589 4.941469-1.175592 4.941469-1.424658C4.941469-1.524284 4.851806-1.524284 4.821918-1.524284C4.732254-1.524284 4.712329-1.484433 4.692403-1.414695C4.363636-.348692 3.686177-.109589 3.367372-.109589C2.978829-.109589 2.819427-.428394 2.819427-.767123C2.819427-.986301 2.879203-1.205479 2.988792-1.643836L3.327522-3.008717Z'/>
+<path id='g5-51' d='M1.643836-1.648817C2.102117-1.648817 2.405978-1.364882 2.405978-.871731C2.405978-.363636 2.092154-.094645 1.648817-.094645C1.594022-.094645 1.001245-.094645 .747198-.353674C.951432-.383562 1.011208-.537983 1.011208-.657534C1.011208-.841843 .871731-.961395 .707347-.961395C.547945-.961395 .398506-.856787 .398506-.647572C.398506-.129514 .986301 .109589 1.663761 .109589C2.480697 .109589 2.988792-.383562 2.988792-.86675C2.988792-1.275218 2.625156-1.62391 2.057285-1.753425C2.655044-1.947696 2.814446-2.326276 2.814446-2.60523C2.814446-3.013699 2.306351-3.317559 1.678705-3.317559C1.066002-3.317559 .572852-3.058531 .572852-2.610212C.572852-2.37609 .762142-2.316314 .856787-2.316314C1.001245-2.316314 1.140722-2.420922 1.140722-2.600249C1.140722-2.714819 1.080946-2.854296 .901619-2.884184C1.115816-3.113325 1.559153-3.128269 1.663761-3.128269C2.027397-3.128269 2.286426-2.94396 2.286426-2.60523C2.286426-2.316314 2.097136-1.863014 1.574097-1.833126C1.43462-1.828144 1.414695-1.823163 1.285181-1.818182C1.230386-1.8132 1.175592-1.808219 1.175592-1.733499C1.175592-1.648817 1.225405-1.648817 1.315068-1.648817H1.643836Z'/>
+<path id='g0-0' d='M6.56538-2.291407C6.734745-2.291407 6.914072-2.291407 6.914072-2.49066S6.734745-2.689913 6.56538-2.689913H1.175592C1.006227-2.689913 .826899-2.689913 .826899-2.49066S1.006227-2.291407 1.175592-2.291407H6.56538Z'/>
+<path id='g0-112' d='M3.875467 8.518057L2.122042 4.64259C2.052304 4.483188 2.002491 4.483188 1.972603 4.483188C1.96264 4.483188 1.912827 4.483188 1.803238 4.562889L.856787 5.280199C.727273 5.379826 .727273 5.409714 .727273 5.439601C.727273 5.489415 .757161 5.549191 .826899 5.549191C.886675 5.549191 1.05604 5.409714 1.165629 5.330012C1.225405 5.280199 1.374844 5.17061 1.484433 5.090909L3.447073 9.404732C3.516812 9.564134 3.566625 9.564134 3.656289 9.564134C3.805729 9.564134 3.835616 9.504359 3.905355 9.364882L8.428394 0C8.498132-.139477 8.498132-.179328 8.498132-.199253C8.498132-.298879 8.418431-.398506 8.298879-.398506C8.219178-.398506 8.14944-.348692 8.069738-.18929L3.875467 8.518057Z'/>
+<path id='g6-0' d='M4.11457 11.447073C4.11457 11.407223 4.094645 11.387298 4.07472 11.35741C3.616438 10.86924 2.938979 10.062267 2.520548 8.438356C2.291407 7.531756 2.201743 6.505604 2.201743 5.579078C2.201743 2.958904 2.82939 1.125778 4.024907-.169365C4.11457-.259029 4.11457-.278954 4.11457-.298879C4.11457-.398506 4.034869-.398506 3.995019-.398506C3.845579-.398506 3.307597 .199253 3.178082 .348692C2.161893 1.554172 1.514321 3.347447 1.514321 5.569116C1.514321 6.983811 1.763387 8.986301 3.068493 10.669988C3.16812 10.789539 3.815691 11.5467 3.995019 11.5467C4.034869 11.5467 4.11457 11.5467 4.11457 11.447073Z'/>
+<path id='g6-1' d='M3.038605 5.579078C3.038605 4.164384 2.789539 2.161893 1.484433 .478207C1.384807 .358655 .737235-.398506 .557908-.398506C.508095-.398506 .438356-.37858 .438356-.298879C.438356-.259029 .458281-.229141 .498132-.199253C.976339 .318804 1.62391 1.125778 2.032379 2.709838C2.261519 3.616438 2.351183 4.64259 2.351183 5.569116C2.351183 6.575342 2.261519 7.591532 2.002491 8.56787C1.62391 9.96264 1.036115 10.759651 .52802 11.317559C.438356 11.407223 .438356 11.427148 .438356 11.447073C.438356 11.526775 .508095 11.5467 .557908 11.5467C.707347 11.5467 1.255293 10.938979 1.374844 10.799502C2.391034 9.594022 3.038605 7.800747 3.038605 5.579078Z'/>
+<path id='g6-18' d='M6.973848 23.402242C6.973848 23.362391 6.953923 23.342466 6.933998 23.312578C6.56538 22.94396 5.897883 22.276463 5.230386 21.200498C3.626401 18.630137 2.899128 15.392279 2.899128 11.556663C2.899128 8.876712 3.257783 5.419676 4.901619 2.450809C5.688667 1.036115 6.505604 .219178 6.94396-.219178C6.973848-.249066 6.973848-.268991 6.973848-.298879C6.973848-.398506 6.90411-.398506 6.764633-.398506S6.60523-.398506 6.455791-.249066C3.118306 2.789539 2.072229 7.352428 2.072229 11.5467C2.072229 15.462017 2.968867 19.407223 5.499377 22.386052C5.69863 22.615193 6.07721 23.023661 6.485679 23.382316C6.60523 23.501868 6.625156 23.501868 6.764633 23.501868S6.973848 23.501868 6.973848 23.402242Z'/>
+<path id='g6-19' d='M5.250311 11.556663C5.250311 7.641345 4.353674 3.696139 1.823163 .71731C1.62391 .488169 1.24533 .079701 .836862-.278954C.71731-.398506 .697385-.398506 .557908-.398506C.438356-.398506 .348692-.398506 .348692-.298879C.348692-.259029 .388543-.219178 .408468-.199253C.757161 .159402 1.424658 .826899 2.092154 1.902864C3.696139 4.473225 4.423412 7.711083 4.423412 11.5467C4.423412 14.22665 4.064757 17.683686 2.420922 20.652553C1.633873 22.067248 .806974 22.894147 .388543 23.312578C.368618 23.342466 .348692 23.372354 .348692 23.402242C.348692 23.501868 .438356 23.501868 .557908 23.501868C.697385 23.501868 .71731 23.501868 .86675 23.352428C4.204234 20.313823 5.250311 15.750934 5.250311 11.556663Z'/>
+</defs>
+<g id='page1'>
+<use x='101.883774' y='78.206831' xlink:href='#g2-102'/>
+<use x='107.833722' y='78.206831' xlink:href='#g3-40'/>
+<use x='111.708095' y='78.206831' xlink:href='#g2-120'/>
+<use x='117.402028' y='78.206831' xlink:href='#g3-59'/>
+<use x='121.829836' y='78.206831' xlink:href='#g2-22'/>
+<use x='130.600152' y='78.206831' xlink:href='#g3-61'/>
+<use x='141.116232' y='78.206831' xlink:href='#g3-48'/>
+<use x='146.097571' y='78.206831' xlink:href='#g2-59'/>
+<use x='150.525379' y='78.206831' xlink:href='#g2-99'/>
+<use x='157.604108' y='78.206831' xlink:href='#g3-61'/>
+<use x='168.120188' y='78.206831' xlink:href='#g3-49'/>
+<use x='173.101527' y='78.206831' xlink:href='#g2-61'/>
+<use x='180.850199' y='72.993882' xlink:href='#g5-51'/>
+<use x='178.705653' y='69.479153' xlink:href='#g0-112'/>
+<rect x='187.007878' y='69.080669' height='.398484' width='9.962671'/>
+<use x='187.007878' y='78.206831' xlink:href='#g3-49'/>
+<use x='191.989217' y='78.206831' xlink:href='#g3-56'/>
+<use x='196.970549' y='78.206831' xlink:href='#g3-41'/>
+<use x='203.612255' y='78.206831' xlink:href='#g3-61'/>
+<use x='214.128335' y='78.206831' xlink:href='#g3-50'/>
+<use x='220.770074' y='78.206831' xlink:href='#g3-101'/>
+<use x='225.197925' y='78.206831' xlink:href='#g3-120'/>
+<use x='230.456013' y='78.206831' xlink:href='#g3-112'/>
+<use x='237.651212' y='64.159381' xlink:href='#g6-18'/>
+<use x='246.18037' y='71.46703' xlink:href='#g3-50'/>
+<rect x='246.18037' y='75.516921' height='.398484' width='4.981335'/>
+<use x='246.18037' y='85.040701' xlink:href='#g3-51'/>
+<use x='252.357219' y='78.206831' xlink:href='#g2-120'/>
+<use x='258.051151' y='74.093335' xlink:href='#g4-51'/>
+<use x='262.520521' y='64.159381' xlink:href='#g6-19'/>
+<use x='271.514551' y='70.137026' xlink:href='#g6-0'/>
+<use x='276.080786' y='78.206831' xlink:href='#g0-0'/>
+<use x='283.829533' y='78.206831' xlink:href='#g2-120'/>
+<use x='289.523466' y='78.206831' xlink:href='#g3-65'/>
+<use x='296.995465' y='78.206831' xlink:href='#g3-105'/>
+<use x='299.762869' y='78.206831' xlink:href='#g3-40'/>
+<use x='303.637243' y='78.206831' xlink:href='#g2-120'/>
+<use x='309.331175' y='74.093335' xlink:href='#g4-50'/>
+<use x='313.800545' y='78.206831' xlink:href='#g3-41'/>
+<use x='319.888785' y='78.206831' xlink:href='#g0-0'/>
+<use x='329.851398' y='78.206831' xlink:href='#g3-65'/>
+<use x='337.323397' y='78.206831' xlink:href='#g3-105'/>
+<use x='340.090802' y='73.86201' xlink:href='#g1-48'/>
+<use x='342.885877' y='78.206831' xlink:href='#g3-40'/>
+<use x='346.760251' y='78.206831' xlink:href='#g2-120'/>
+<use x='352.454183' y='74.093335' xlink:href='#g4-50'/>
+<use x='356.923553' y='78.206831' xlink:href='#g3-41'/>
+<use x='360.797927' y='70.137026' xlink:href='#g6-1'/>
+</g>
+</svg>
\ No newline at end of file
diff --git a/doc/equations/saspoint5_ref1.svg b/doc/equations/saspoint5_ref1.svg
new file mode 100644
index 0000000000..b013598e24
--- /dev/null
+++ b/doc/equations/saspoint5_ref1.svg
@@ -0,0 +1,75 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!-- This file was generated by dvisvgm 3.0.3 -->
+<svg version='1.1' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='190.855814pt' height='24.007447pt' viewBox='138.196059 63.760895 190.855814 24.007447'>
+<defs>
+<path id='g4-40' d='M3.297634 2.391034C3.297634 2.361146 3.297634 2.34122 3.128269 2.171856C1.882939 .916563 1.564134-.966376 1.564134-2.49066C1.564134-4.224159 1.942715-5.957659 3.16812-7.202989C3.297634-7.32254 3.297634-7.342466 3.297634-7.372354C3.297634-7.442092 3.257783-7.47198 3.198007-7.47198C3.098381-7.47198 2.201743-6.794521 1.613948-5.529265C1.105853-4.433375 .986301-3.327522 .986301-2.49066C.986301-1.713574 1.09589-.508095 1.643836 .617684C2.241594 1.843088 3.098381 2.49066 3.198007 2.49066C3.257783 2.49066 3.297634 2.460772 3.297634 2.391034Z'/>
+<path id='g4-41' d='M2.879203-2.49066C2.879203-3.267746 2.769614-4.473225 2.221669-5.599004C1.62391-6.824408 .767123-7.47198 .667497-7.47198C.607721-7.47198 .56787-7.43213 .56787-7.372354C.56787-7.342466 .56787-7.32254 .757161-7.143213C1.733499-6.156912 2.30137-4.572852 2.30137-2.49066C2.30137-.787049 1.932752 .966376 .697385 2.221669C.56787 2.34122 .56787 2.361146 .56787 2.391034C.56787 2.450809 .607721 2.49066 .667497 2.49066C.767123 2.49066 1.663761 1.8132 2.251557 .547945C2.759651-.547945 2.879203-1.653798 2.879203-2.49066Z'/>
+<path id='g4-49' d='M2.929016-6.37609C2.929016-6.615193 2.929016-6.635118 2.699875-6.635118C2.082192-5.997509 1.205479-5.997509 .886675-5.997509V-5.688667C1.085928-5.688667 1.673724-5.688667 2.191781-5.947696V-.787049C2.191781-.428394 2.161893-.308842 1.265255-.308842H.946451V0C1.295143-.029888 2.161893-.029888 2.560399-.029888S3.825654-.029888 4.174346 0V-.308842H3.855542C2.958904-.308842 2.929016-.418431 2.929016-.787049V-6.37609Z'/>
+<path id='g4-50' d='M1.265255-.767123L2.321295-1.793275C3.875467-3.16812 4.473225-3.706102 4.473225-4.702366C4.473225-5.838107 3.576588-6.635118 2.361146-6.635118C1.235367-6.635118 .498132-5.718555 .498132-4.83188C.498132-4.273973 .996264-4.273973 1.026152-4.273973C1.195517-4.273973 1.544209-4.393524 1.544209-4.801993C1.544209-5.061021 1.364882-5.32005 1.016189-5.32005C.936488-5.32005 .916563-5.32005 .886675-5.310087C1.115816-5.957659 1.653798-6.326276 2.231631-6.326276C3.138232-6.326276 3.566625-5.519303 3.566625-4.702366C3.566625-3.905355 3.068493-3.118306 2.520548-2.500623L.607721-.368618C.498132-.259029 .498132-.239103 .498132 0H4.194271L4.473225-1.733499H4.224159C4.174346-1.43462 4.104608-.996264 4.004981-.846824C3.935243-.767123 3.277709-.767123 3.058531-.767123H1.265255Z'/>
+<path id='g4-59' d='M1.912827-3.765878C1.912827-4.054795 1.673724-4.293898 1.384807-4.293898S.856787-4.054795 .856787-3.765878S1.09589-3.237858 1.384807-3.237858S1.912827-3.476961 1.912827-3.765878ZM1.723537-.109589C1.723537 .169365 1.723537 .946451 1.085928 1.683686C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 1.942715 1.125778 1.942715-.029888C1.942715-.318804 1.92279-1.05604 1.384807-1.05604C1.036115-1.05604 .856787-.787049 .856787-.52802S1.026152 0 1.384807 0C1.43462 0 1.454545 0 1.464508-.009963C1.534247-.019925 1.643836-.039851 1.723537-.109589Z'/>
+<path id='g4-61' d='M6.844334-3.257783C6.993773-3.257783 7.183064-3.257783 7.183064-3.457036S6.993773-3.656289 6.854296-3.656289H.886675C.747198-3.656289 .557908-3.656289 .557908-3.457036S.747198-3.257783 .896638-3.257783H6.844334ZM6.854296-1.325031C6.993773-1.325031 7.183064-1.325031 7.183064-1.524284S6.993773-1.723537 6.844334-1.723537H.896638C.747198-1.723537 .557908-1.723537 .557908-1.524284S.747198-1.325031 .886675-1.325031H6.854296Z'/>
+<path id='g4-101' d='M1.115816-2.510585C1.175592-3.995019 2.012453-4.244085 2.351183-4.244085C3.377335-4.244085 3.476961-2.899128 3.476961-2.510585H1.115816ZM1.105853-2.30137H3.88543C4.104608-2.30137 4.134496-2.30137 4.134496-2.510585C4.134496-3.496887 3.596513-4.463263 2.351183-4.463263C1.195517-4.463263 .278954-3.437111 .278954-2.191781C.278954-.856787 1.325031 .109589 2.470735 .109589C3.686177 .109589 4.134496-.996264 4.134496-1.185554C4.134496-1.285181 4.054795-1.305106 4.004981-1.305106C3.915318-1.305106 3.895392-1.24533 3.875467-1.165629C3.526775-.139477 2.630137-.139477 2.530511-.139477C2.032379-.139477 1.633873-.438356 1.404732-.806974C1.105853-1.285181 1.105853-1.942715 1.105853-2.30137Z'/>
+<path id='g4-112' d='M1.713574-3.745953V-4.403487L.278954-4.293898V-3.985056C.986301-3.985056 1.05604-3.92528 1.05604-3.486924V1.175592C1.05604 1.62391 .946451 1.62391 .278954 1.62391V1.932752C.617684 1.92279 1.135741 1.902864 1.39477 1.902864C1.663761 1.902864 2.171856 1.92279 2.520548 1.932752V1.62391C1.853051 1.62391 1.743462 1.62391 1.743462 1.175592V-.498132V-.587796C1.793275-.428394 2.211706 .109589 2.968867 .109589C4.154421 .109589 5.190535-.86675 5.190535-2.15193C5.190535-3.417186 4.224159-4.403487 3.108344-4.403487C2.331258-4.403487 1.912827-3.965131 1.713574-3.745953ZM1.743462-1.135741V-3.35741C2.032379-3.865504 2.520548-4.154421 3.028643-4.154421C3.755915-4.154421 4.363636-3.277709 4.363636-2.15193C4.363636-.946451 3.666252-.109589 2.929016-.109589C2.530511-.109589 2.15193-.308842 1.882939-.71731C1.743462-.926526 1.743462-.936488 1.743462-1.135741Z'/>
+<path id='g4-120' d='M2.859278-2.34122C3.158157-2.719801 3.536737-3.20797 3.775841-3.466999C4.084682-3.825654 4.493151-3.975093 4.961395-3.985056V-4.293898C4.702366-4.273973 4.403487-4.26401 4.144458-4.26401C3.845579-4.26401 3.317559-4.283935 3.188045-4.293898V-3.985056C3.39726-3.965131 3.476961-3.835616 3.476961-3.676214S3.377335-3.387298 3.327522-3.327522L2.709838-2.550436L1.932752-3.556663C1.843088-3.656289 1.843088-3.676214 1.843088-3.73599C1.843088-3.88543 1.992528-3.975093 2.191781-3.985056V-4.293898C1.932752-4.283935 1.275218-4.26401 1.115816-4.26401C.9066-4.26401 .438356-4.273973 .169365-4.293898V-3.985056C.86675-3.985056 .876712-3.975093 1.344956-3.377335L2.331258-2.092154L1.39477-.9066C.916563-.328767 .328767-.308842 .119552-.308842V0C.37858-.019925 .687422-.029888 .946451-.029888C1.235367-.029888 1.653798-.009963 1.892902 0V-.308842C1.673724-.33873 1.603985-.468244 1.603985-.617684C1.603985-.836862 1.892902-1.165629 2.500623-1.882939L3.257783-.886675C3.337484-.777086 3.466999-.617684 3.466999-.557908C3.466999-.468244 3.377335-.318804 3.108344-.308842V0C3.407223-.009963 3.965131-.029888 4.184309-.029888C4.4533-.029888 4.841843-.019925 5.140722 0V-.308842C4.60274-.308842 4.423412-.328767 4.194271-.617684L2.859278-2.34122Z'/>
+<path id='g2-22' d='M2.30137-3.496887C2.351183-3.706102 2.450809-4.084682 2.450809-4.134496C2.450809-4.303861 2.331258-4.403487 2.161893-4.403487C2.132005-4.403487 1.853051-4.393524 1.763387-4.044832L.328767 1.723537C.298879 1.843088 .298879 1.863014 .298879 1.882939C.298879 2.032379 .408468 2.15193 .577833 2.15193C.787049 2.15193 .9066 1.972603 .926526 1.942715C.966376 1.853051 1.09589 1.334994 1.474471-.199253C1.793275 .069738 2.241594 .109589 2.440847 .109589C3.138232 .109589 3.526775-.33873 3.765878-.617684C3.855542-.169365 4.224159 .109589 4.662516 .109589C5.011208 .109589 5.240349-.119552 5.399751-.438356C5.569116-.797011 5.69863-1.404732 5.69863-1.424658C5.69863-1.524284 5.608966-1.524284 5.579078-1.524284C5.479452-1.524284 5.469489-1.484433 5.439601-1.344956C5.270237-.697385 5.090909-.109589 4.682441-.109589C4.41345-.109589 4.383562-.368618 4.383562-.56787C4.383562-.787049 4.503113-1.255293 4.582814-1.603985L4.861768-2.67995C4.891656-2.82939 4.991283-3.20797 5.031133-3.35741C5.080946-3.58655 5.180573-3.965131 5.180573-4.024907C5.180573-4.204234 5.041096-4.293898 4.891656-4.293898C4.841843-4.293898 4.582814-4.283935 4.503113-3.945205L4.034869-2.082192C3.915318-1.58406 3.805729-1.165629 3.775841-1.066002C3.765878-1.016189 3.287671-.109589 2.480697-.109589C1.982565-.109589 1.743462-.438356 1.743462-.976339C1.743462-1.265255 1.8132-1.544209 1.882939-1.823163L2.30137-3.496887Z'/>
+<path id='g2-25' d='M2.6401-3.716065H3.765878C3.437111-2.241594 3.347447-1.8132 3.347447-1.145704C3.347447-.996264 3.347447-.727273 3.427148-.388543C3.526775 .049813 3.636364 .109589 3.785803 .109589C3.985056 .109589 4.194271-.069738 4.194271-.268991C4.194271-.328767 4.194271-.348692 4.134496-.488169C3.845579-1.205479 3.845579-1.853051 3.845579-2.132005C3.845579-2.660025 3.915318-3.198007 4.024907-3.716065H5.160648C5.290162-3.716065 5.648817-3.716065 5.648817-4.054795C5.648817-4.293898 5.439601-4.293898 5.250311-4.293898H1.912827C1.693649-4.293898 1.315068-4.293898 .876712-3.825654C.52802-3.437111 .268991-2.978829 .268991-2.929016C.268991-2.919054 .268991-2.82939 .388543-2.82939C.468244-2.82939 .488169-2.86924 .547945-2.948941C1.036115-3.716065 1.613948-3.716065 1.8132-3.716065H2.381071C2.062267-2.510585 1.524284-1.305106 1.105853-.398506C1.026152-.249066 1.026152-.229141 1.026152-.159402C1.026152 .029888 1.185554 .109589 1.315068 .109589C1.613948 .109589 1.693649-.169365 1.8132-.537983C1.952677-.996264 1.952677-1.016189 2.082192-1.514321L2.6401-3.716065Z'/>
+<path id='g2-59' d='M2.022416-.009963C2.022416-.667497 1.77335-1.05604 1.384807-1.05604C1.05604-1.05604 .856787-.806974 .856787-.52802C.856787-.259029 1.05604 0 1.384807 0C1.504359 0 1.633873-.039851 1.733499-.129514C1.763387-.14944 1.77335-.159402 1.783313-.159402S1.803238-.14944 1.803238-.009963C1.803238 .727273 1.454545 1.325031 1.125778 1.653798C1.016189 1.763387 1.016189 1.783313 1.016189 1.8132C1.016189 1.882939 1.066002 1.92279 1.115816 1.92279C1.225405 1.92279 2.022416 1.155666 2.022416-.009963Z'/>
+<path id='g2-99' d='M3.945205-3.785803C3.785803-3.785803 3.646326-3.785803 3.506849-3.646326C3.347447-3.496887 3.327522-3.327522 3.327522-3.257783C3.327522-3.01868 3.506849-2.909091 3.696139-2.909091C3.985056-2.909091 4.254047-3.148194 4.254047-3.5467C4.254047-4.034869 3.785803-4.403487 3.078456-4.403487C1.733499-4.403487 .408468-2.978829 .408468-1.574097C.408468-.67746 .986301 .109589 2.022416 .109589C3.447073 .109589 4.283935-.946451 4.283935-1.066002C4.283935-1.125778 4.224159-1.195517 4.164384-1.195517C4.11457-1.195517 4.094645-1.175592 4.034869-1.09589C3.247821-.109589 2.161893-.109589 2.042341-.109589C1.414695-.109589 1.145704-.597758 1.145704-1.195517C1.145704-1.603985 1.344956-2.570361 1.683686-3.188045C1.992528-3.755915 2.540473-4.184309 3.088418-4.184309C3.427148-4.184309 3.805729-4.054795 3.945205-3.785803Z'/>
+<path id='g2-100' d='M5.140722-6.804483C5.140722-6.814446 5.140722-6.914072 5.011208-6.914072C4.861768-6.914072 3.915318-6.824408 3.745953-6.804483C3.666252-6.794521 3.606476-6.744707 3.606476-6.615193C3.606476-6.495641 3.696139-6.495641 3.845579-6.495641C4.323786-6.495641 4.343711-6.425903 4.343711-6.326276L4.313823-6.127024L3.716065-3.765878C3.536737-4.134496 3.247821-4.403487 2.799502-4.403487C1.633873-4.403487 .398506-2.938979 .398506-1.484433C.398506-.547945 .946451 .109589 1.723537 .109589C1.92279 .109589 2.420922 .069738 3.01868-.637609C3.098381-.219178 3.447073 .109589 3.92528 .109589C4.273973 .109589 4.503113-.119552 4.662516-.438356C4.83188-.797011 4.961395-1.404732 4.961395-1.424658C4.961395-1.524284 4.871731-1.524284 4.841843-1.524284C4.742217-1.524284 4.732254-1.484433 4.702366-1.344956C4.533001-.697385 4.353674-.109589 3.945205-.109589C3.676214-.109589 3.646326-.368618 3.646326-.56787C3.646326-.806974 3.666252-.876712 3.706102-1.046077L5.140722-6.804483ZM3.068493-1.185554C3.01868-1.006227 3.01868-.986301 2.86924-.816936C2.430884-.268991 2.022416-.109589 1.743462-.109589C1.24533-.109589 1.105853-.657534 1.105853-1.046077C1.105853-1.544209 1.424658-2.769614 1.653798-3.227895C1.96264-3.815691 2.410959-4.184309 2.809465-4.184309C3.457036-4.184309 3.596513-3.367372 3.596513-3.307597S3.576588-3.188045 3.566625-3.138232L3.068493-1.185554Z'/>
+<path id='g2-101' d='M1.863014-2.30137C2.15193-2.30137 2.889166-2.321295 3.387298-2.530511C4.084682-2.82939 4.134496-3.417186 4.134496-3.556663C4.134496-3.995019 3.755915-4.403487 3.068493-4.403487C1.96264-4.403487 .458281-3.437111 .458281-1.693649C.458281-.67746 1.046077 .109589 2.022416 .109589C3.447073 .109589 4.283935-.946451 4.283935-1.066002C4.283935-1.125778 4.224159-1.195517 4.164384-1.195517C4.11457-1.195517 4.094645-1.175592 4.034869-1.09589C3.247821-.109589 2.161893-.109589 2.042341-.109589C1.265255-.109589 1.175592-.946451 1.175592-1.265255C1.175592-1.384807 1.185554-1.693649 1.334994-2.30137H1.863014ZM1.39477-2.520548C1.783313-4.034869 2.809465-4.184309 3.068493-4.184309C3.536737-4.184309 3.805729-3.895392 3.805729-3.556663C3.805729-2.520548 2.211706-2.520548 1.803238-2.520548H1.39477Z'/>
+<path id='g2-102' d='M3.656289-3.985056H4.513076C4.712329-3.985056 4.811955-3.985056 4.811955-4.184309C4.811955-4.293898 4.712329-4.293898 4.542964-4.293898H3.716065L3.92528-5.429639C3.965131-5.638854 4.104608-6.346202 4.164384-6.465753C4.254047-6.655044 4.423412-6.804483 4.632628-6.804483C4.672478-6.804483 4.931507-6.804483 5.120797-6.625156C4.682441-6.585305 4.582814-6.236613 4.582814-6.087173C4.582814-5.858032 4.762142-5.738481 4.951432-5.738481C5.210461-5.738481 5.499377-5.957659 5.499377-6.336239C5.499377-6.794521 5.041096-7.023661 4.632628-7.023661C4.293898-7.023661 3.666252-6.844334 3.367372-5.858032C3.307597-5.648817 3.277709-5.549191 3.038605-4.293898H2.351183C2.161893-4.293898 2.052304-4.293898 2.052304-4.104608C2.052304-3.985056 2.141968-3.985056 2.331258-3.985056H2.988792L2.241594-.049813C2.062267 .916563 1.892902 1.823163 1.374844 1.823163C1.334994 1.823163 1.085928 1.823163 .896638 1.643836C1.354919 1.613948 1.444583 1.255293 1.444583 1.105853C1.444583 .876712 1.265255 .757161 1.075965 .757161C.816936 .757161 .52802 .976339 .52802 1.354919C.52802 1.803238 .966376 2.042341 1.374844 2.042341C1.92279 2.042341 2.321295 1.454545 2.500623 1.075965C2.819427 .448319 3.048568-.757161 3.058531-.826899L3.656289-3.985056Z'/>
+<path id='g2-105' d='M2.82939-6.22665C2.82939-6.425903 2.689913-6.585305 2.460772-6.585305C2.191781-6.585305 1.92279-6.326276 1.92279-6.057285C1.92279-5.867995 2.062267-5.69863 2.30137-5.69863C2.530511-5.69863 2.82939-5.927771 2.82939-6.22665ZM2.072229-2.480697C2.191781-2.769614 2.191781-2.789539 2.291407-3.058531C2.371108-3.257783 2.420922-3.39726 2.420922-3.58655C2.420922-4.034869 2.102117-4.403487 1.603985-4.403487C.667497-4.403487 .288917-2.958904 .288917-2.86924C.288917-2.769614 .388543-2.769614 .408468-2.769614C.508095-2.769614 .518057-2.789539 .56787-2.948941C.836862-3.88543 1.235367-4.184309 1.574097-4.184309C1.653798-4.184309 1.823163-4.184309 1.823163-3.865504C1.823163-3.656289 1.753425-3.447073 1.713574-3.347447C1.633873-3.088418 1.185554-1.932752 1.026152-1.504359C.926526-1.24533 .797011-.916563 .797011-.707347C.797011-.239103 1.135741 .109589 1.613948 .109589C2.550436 .109589 2.919054-1.334994 2.919054-1.424658C2.919054-1.524284 2.82939-1.524284 2.799502-1.524284C2.699875-1.524284 2.699875-1.494396 2.650062-1.344956C2.470735-.71731 2.141968-.109589 1.633873-.109589C1.464508-.109589 1.39477-.209215 1.39477-.438356C1.39477-.687422 1.454545-.826899 1.683686-1.43462L2.072229-2.480697Z'/>
+<path id='g2-116' d='M2.052304-3.985056H2.988792C3.188045-3.985056 3.287671-3.985056 3.287671-4.184309C3.287671-4.293898 3.188045-4.293898 3.008717-4.293898H2.132005C2.49066-5.708593 2.540473-5.907846 2.540473-5.967621C2.540473-6.136986 2.420922-6.236613 2.251557-6.236613C2.221669-6.236613 1.942715-6.22665 1.853051-5.877958L1.464508-4.293898H.52802C.328767-4.293898 .229141-4.293898 .229141-4.104608C.229141-3.985056 .308842-3.985056 .508095-3.985056H1.384807C.667497-1.155666 .627646-.986301 .627646-.806974C.627646-.268991 1.006227 .109589 1.544209 .109589C2.560399 .109589 3.128269-1.344956 3.128269-1.424658C3.128269-1.524284 3.048568-1.524284 3.008717-1.524284C2.919054-1.524284 2.909091-1.494396 2.859278-1.384807C2.430884-.348692 1.902864-.109589 1.564134-.109589C1.354919-.109589 1.255293-.239103 1.255293-.56787C1.255293-.806974 1.275218-.876712 1.315068-1.046077L2.052304-3.985056Z'/>
+<path id='g2-120' d='M3.327522-3.008717C3.387298-3.267746 3.616438-4.184309 4.313823-4.184309C4.363636-4.184309 4.60274-4.184309 4.811955-4.054795C4.533001-4.004981 4.333748-3.755915 4.333748-3.516812C4.333748-3.35741 4.443337-3.16812 4.712329-3.16812C4.931507-3.16812 5.250311-3.347447 5.250311-3.745953C5.250311-4.26401 4.662516-4.403487 4.323786-4.403487C3.745953-4.403487 3.39726-3.875467 3.277709-3.646326C3.028643-4.303861 2.49066-4.403487 2.201743-4.403487C1.165629-4.403487 .597758-3.118306 .597758-2.86924C.597758-2.769614 .697385-2.769614 .71731-2.769614C.797011-2.769614 .826899-2.789539 .846824-2.879203C1.185554-3.935243 1.843088-4.184309 2.181818-4.184309C2.371108-4.184309 2.719801-4.094645 2.719801-3.516812C2.719801-3.20797 2.550436-2.540473 2.181818-1.145704C2.022416-.52802 1.673724-.109589 1.235367-.109589C1.175592-.109589 .946451-.109589 .737235-.239103C.986301-.288917 1.205479-.498132 1.205479-.777086C1.205479-1.046077 .986301-1.125778 .836862-1.125778C.537983-1.125778 .288917-.86675 .288917-.547945C.288917-.089664 .787049 .109589 1.225405 .109589C1.882939 .109589 2.241594-.587796 2.271482-.647572C2.391034-.278954 2.749689 .109589 3.347447 .109589C4.373599 .109589 4.941469-1.175592 4.941469-1.424658C4.941469-1.524284 4.851806-1.524284 4.821918-1.524284C4.732254-1.524284 4.712329-1.484433 4.692403-1.414695C4.363636-.348692 3.686177-.109589 3.367372-.109589C2.978829-.109589 2.819427-.428394 2.819427-.767123C2.819427-.986301 2.879203-1.205479 2.988792-1.643836L3.327522-3.008717Z'/>
+<path id='g3-105' d='M2.259527-4.358655C2.259527-4.470237 2.175841-4.623661 1.980573-4.623661C1.792279-4.623661 1.590037-4.442341 1.590037-4.2401C1.590037-4.121544 1.680697-3.975093 1.868991-3.975093C2.071233-3.975093 2.259527-4.170361 2.259527-4.358655ZM.836862-.81594C.808966-.72528 .774097-.641594 .774097-.523039C.774097-.195268 1.053051 .069738 1.436613 .069738C2.133998 .069738 2.440847-.892653 2.440847-.99726C2.440847-1.08792 2.350187-1.08792 2.329265-1.08792C2.231631-1.08792 2.224658-1.046077 2.196762-.969365C2.036364-.411457 1.729514-.125529 1.457534-.125529C1.318057-.125529 1.283188-.216189 1.283188-.369614C1.283188-.530012 1.332005-.662516 1.39477-.81594C1.464508-1.004234 1.54122-1.192528 1.617933-1.373848C1.680697-1.54122 1.931756-2.175841 1.959651-2.259527C1.980573-2.329265 2.001494-2.412951 2.001494-2.48269C2.001494-2.810461 1.72254-3.075467 1.338979-3.075467C.648568-3.075467 .327771-2.127024 .327771-2.008468C.327771-1.917808 .425405-1.917808 .446326-1.917808C.54396-1.917808 .550934-1.952677 .571856-2.02939C.753176-2.629141 1.060025-2.880199 1.318057-2.880199C1.429639-2.880199 1.492403-2.824408 1.492403-2.636115C1.492403-2.475716 1.45056-2.371108 1.276214-1.93873L.836862-.81594Z'/>
+<path id='g3-116' d='M1.715567-2.75467H2.426899C2.559402-2.75467 2.650062-2.75467 2.650062-2.908095C2.650062-3.005729 2.559402-3.005729 2.440847-3.005729H1.778331L2.036364-4.037858C2.043337-4.072727 2.057285-4.107597 2.057285-4.135492C2.057285-4.261021 1.959651-4.358655 1.820174-4.358655C1.645828-4.358655 1.54122-4.2401 1.492403-4.05878C1.443587-3.884433 1.534247-4.219178 1.227397-3.005729H.516065C.383562-3.005729 .292902-3.005729 .292902-2.852304C.292902-2.75467 .376588-2.75467 .502117-2.75467H1.164633L.753176-1.108842C.711333-.934496 .648568-.683437 .648568-.592777C.648568-.18132 .99726 .069738 1.39477 .069738C2.168867 .069738 2.608219-.9066 2.608219-.99726S2.517559-1.08792 2.496638-1.08792C2.412951-1.08792 2.405978-1.073973 2.350187-.955417C2.154919-.516065 1.799253-.125529 1.415691-.125529C1.26924-.125529 1.171606-.216189 1.171606-.467248C1.171606-.536986 1.199502-.683437 1.21345-.753176L1.715567-2.75467Z'/>
+<path id='g3-120' d='M1.736488-.739228C1.66675-.502117 1.436613-.125529 1.080946-.125529C1.060025-.125529 .850809-.125529 .704359-.223163C.990286-.313823 1.011208-.564882 1.011208-.606725C1.011208-.760149 .892653-.864757 .732254-.864757C.536986-.864757 .334745-.697385 .334745-.439352C.334745-.09066 .72528 .069738 1.066999 .069738C1.387796 .069738 1.673724-.132503 1.84807-.425405C2.015442-.055791 2.399004 .069738 2.677958 .069738C3.47995 .069738 3.905355-.801993 3.905355-.99726C3.905355-1.08792 3.814695-1.08792 3.793773-1.08792C3.696139-1.08792 3.689166-1.053051 3.66127-.969365C3.514819-.488169 3.096389-.125529 2.705853-.125529C2.426899-.125529 2.280448-.313823 2.280448-.578829C2.280448-.760149 2.447821-1.39477 2.643088-2.168867C2.782565-2.705853 3.096389-2.880199 3.326526-2.880199C3.340473-2.880199 3.556663-2.880199 3.703113-2.782565C3.47995-2.719801 3.396264-2.524533 3.396264-2.399004C3.396264-2.245579 3.514819-2.140971 3.675218-2.140971S4.065753-2.273474 4.065753-2.566376C4.065753-2.956912 3.619427-3.075467 3.340473-3.075467C2.991781-3.075467 2.712827-2.84533 2.559402-2.580324C2.433873-2.866252 2.113076-3.075467 1.72254-3.075467C.941469-3.075467 .495143-2.217684 .495143-2.008468C.495143-1.917808 .592777-1.917808 .613699-1.917808C.704359-1.917808 .711333-1.945704 .746202-2.036364C.920548-2.580324 1.3599-2.880199 1.701619-2.880199C1.931756-2.880199 2.12005-2.75467 2.12005-2.419925C2.12005-2.280448 2.036364-1.931756 1.973599-1.694645L1.736488-.739228Z'/>
+<path id='g1-0' d='M5.188543-1.57609C5.300125-1.57609 5.467497-1.57609 5.467497-1.743462C5.467497-1.917808 5.307098-1.917808 5.188543-1.917808H1.03213C.920548-1.917808 .753176-1.917808 .753176-1.750436C.753176-1.57609 .913574-1.57609 1.03213-1.57609H5.188543Z'/>
+<path id='g1-49' d='M4.030884-1.910834C3.66127-2.343213 3.549689-2.461768 3.284682-2.650062C2.859278-2.956912 2.412951-3.075467 2.02939-3.075467C1.164633-3.075467 .536986-2.336239 .536986-1.499377C.536986-.683437 1.136737 .069738 2.008468 .069738C2.977833 .069738 3.654296-.72528 3.905355-1.094894C4.274969-.662516 4.38655-.54396 4.651557-.355666C5.076961-.048817 5.523288 .069738 5.906849 .069738C6.771606 .069738 7.399253-.669489 7.399253-1.506351C7.399253-2.322291 6.799502-3.075467 5.927771-3.075467C4.958406-3.075467 4.281943-2.280448 4.030884-1.910834ZM4.254047-1.652802C4.533001-2.106102 5.139726-2.810461 5.990535-2.810461C6.708842-2.810461 7.203985-2.175841 7.203985-1.506351S6.660025-.313823 6.018431-.313823C5.369863-.313823 4.923537-.843836 4.254047-1.652802ZM3.682192-1.352927C3.403238-.899626 2.796513-.195268 1.945704-.195268C1.227397-.195268 .732254-.829888 .732254-1.499377S1.276214-2.691905 1.917808-2.691905C2.566376-2.691905 3.012702-2.161893 3.682192-1.352927Z'/>
+<path id='g5-90' d='M1.036115 21.688667C1.354919 21.668742 1.524284 21.449564 1.524284 21.200498C1.524284 20.871731 1.275218 20.712329 1.046077 20.712329C.806974 20.712329 .557908 20.861768 .557908 21.210461C.557908 21.718555 1.05604 22.136986 1.663761 22.136986C3.178082 22.136986 3.745953 19.805729 4.4533 16.916563C5.220423 13.768369 5.867995 10.590286 6.405978 7.392279C6.774595 5.270237 7.143213 3.277709 7.481943 1.992528C7.601494 1.504359 7.940224 .219178 8.328767 .219178C8.637609 .219178 8.886675 .408468 8.926526 .448319C8.597758 .468244 8.428394 .687422 8.428394 .936488C8.428394 1.265255 8.67746 1.424658 8.9066 1.424658C9.145704 1.424658 9.39477 1.275218 9.39477 .926526C9.39477 .388543 8.856787 0 8.308842 0C7.551681 0 6.993773 1.085928 6.445828 3.118306C6.41594 3.227895 5.061021 8.229141 3.965131 14.744707C3.706102 16.268991 3.417186 17.932752 3.088418 19.317559C2.909091 20.044832 2.450809 21.917808 1.643836 21.917808C1.285181 21.917808 1.046077 21.688667 1.036115 21.688667Z'/>
+<path id='g5-112' d='M4.224159 11.556663C4.533001 11.556663 4.542964 11.5467 4.64259 11.35741L10.092154-.019925C10.161893-.14944 10.161893-.169365 10.161893-.199253C10.161893-.308842 10.082192-.398506 9.96264-.398506C9.833126-.398506 9.793275-.308842 9.743462-.199253L4.60274 10.520548L2.480697 5.579078L1.09589 6.645081L1.24533 6.804483L1.952677 6.266501L4.224159 11.556663Z'/>
+<path id='g0-0' d='M6.56538-2.291407C6.734745-2.291407 6.914072-2.291407 6.914072-2.49066S6.734745-2.689913 6.56538-2.689913H1.175592C1.006227-2.689913 .826899-2.689913 .826899-2.49066S1.006227-2.291407 1.175592-2.291407H6.56538Z'/>
+<path id='g0-106' d='M1.58406-7.113325C1.58406-7.292653 1.58406-7.47198 1.384807-7.47198S1.185554-7.292653 1.185554-7.113325V2.132005C1.185554 2.311333 1.185554 2.49066 1.384807 2.49066S1.58406 2.311333 1.58406 2.132005V-7.113325Z'/>
+</defs>
+<g id='page1'>
+<use x='138.196059' y='77.86093' xlink:href='#g2-102'/>
+<use x='144.146006' y='77.86093' xlink:href='#g4-40'/>
+<use x='148.02038' y='77.86093' xlink:href='#g2-120'/>
+<use x='153.714312' y='77.86093' xlink:href='#g4-59'/>
+<use x='158.14212' y='77.86093' xlink:href='#g2-22'/>
+<use x='164.145105' y='77.86093' xlink:href='#g2-59'/>
+<use x='168.572913' y='77.86093' xlink:href='#g2-99'/>
+<use x='172.884309' y='77.86093' xlink:href='#g4-41'/>
+<use x='179.526015' y='77.86093' xlink:href='#g4-61'/>
+<use x='194.255817' y='71.121128' xlink:href='#g4-49'/>
+<rect x='191.237609' y='75.17102' height='.398484' width='11.01775'/>
+<use x='191.237609' y='84.6948' xlink:href='#g4-50'/>
+<use x='196.218948' y='84.6948' xlink:href='#g2-25'/>
+<use x='205.111272' y='64.30056' xlink:href='#g5-90'/>
+<use x='215.073928' y='66.763522' xlink:href='#g1-49'/>
+<use x='210.646089' y='86.938112' xlink:href='#g1-0'/>
+<use x='216.872769' y='86.938112' xlink:href='#g1-49'/>
+<use x='226.973746' y='77.86093' xlink:href='#g4-101'/>
+<use x='231.401598' y='77.86093' xlink:href='#g4-120'/>
+<use x='236.659685' y='77.86093' xlink:href='#g4-112'/>
+<use x='242.194485' y='77.86093' xlink:href='#g4-40'/>
+<use x='246.068859' y='77.86093' xlink:href='#g2-105'/>
+<use x='249.501117' y='77.86093' xlink:href='#g2-116'/>
+<use x='253.098752' y='77.86093' xlink:href='#g2-22'/>
+<use x='261.315603' y='77.86093' xlink:href='#g0-0'/>
+<use x='271.278216' y='68.856443' xlink:href='#g5-112'/>
+<rect x='281.240871' y='68.457959' height='.398484' width='13.443833'/>
+<use x='281.240871' y='77.86093' xlink:href='#g0-106'/>
+<use x='284.00828' y='77.86093' xlink:href='#g2-99'/>
+<use x='288.319676' y='77.86093' xlink:href='#g2-116'/>
+<use x='291.917311' y='77.86093' xlink:href='#g0-106'/>
+<use x='294.684704' y='77.86093' xlink:href='#g4-41'/>
+<use x='298.559078' y='77.86093' xlink:href='#g2-101'/>
+<use x='303.19795' y='73.747434' xlink:href='#g1-0'/>
+<use x='309.424629' y='73.747434' xlink:href='#g3-105'/>
+<use x='312.243356' y='73.747434' xlink:href='#g3-120'/>
+<use x='316.761157' y='73.747434' xlink:href='#g3-116'/>
+<use x='320.268806' y='77.86093' xlink:href='#g2-100'/>
+<use x='325.454239' y='77.86093' xlink:href='#g2-116'/>
+</g>
+</svg>
\ No newline at end of file
diff --git a/doc/graphs/holtsmark_ccdf_accuracy_64.png b/doc/graphs/holtsmark_ccdf_accuracy_64.png
new file mode 100644
index 0000000000..e86f37a551
Binary files /dev/null and b/doc/graphs/holtsmark_ccdf_accuracy_64.png differ
diff --git a/doc/graphs/holtsmark_pdf1.svg b/doc/graphs/holtsmark_pdf1.svg
new file mode 100644
index 0000000000..a06288261b
--- /dev/null
+++ b/doc/graphs/holtsmark_pdf1.svg
@@ -0,0 +1,1525 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="730.125pt" height="392.514375pt" viewBox="0 0 730.125 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:41:11.918601</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 730.125 392.514375 
+L 730.125 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 50.14375 354.958125 
+L 719.74375 354.958125 
+L 719.74375 22.318125 
+L 50.14375 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="mc4ac309384" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#mc4ac309384" x="50.14375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(42.772656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#mc4ac309384" x="133.84375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −6 -->
+      <g transform="translate(126.472656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-36" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#mc4ac309384" x="217.54375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- −4 -->
+      <g transform="translate(210.172656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#mc4ac309384" x="301.24375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- −2 -->
+      <g transform="translate(293.872656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-32" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#mc4ac309384" x="384.94375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 0 -->
+      <g transform="translate(381.7625 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#mc4ac309384" x="468.64375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 2 -->
+      <g transform="translate(465.4625 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#mc4ac309384" x="552.34375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 4 -->
+      <g transform="translate(549.1625 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#mc4ac309384" x="636.04375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 6 -->
+      <g transform="translate(632.8625 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#mc4ac309384" x="719.74375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 8 -->
+      <g transform="translate(716.5625 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- $x$ -->
+     <g transform="translate(381.94375 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_10">
+      <defs>
+       <path id="m01c4de5139" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m01c4de5139" x="50.14375" y="340.077208" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 0.00 -->
+      <g transform="translate(20.878125 343.876427) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#m01c4de5139" x="50.14375" y="287.417353" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 0.05 -->
+      <g transform="translate(20.878125 291.216572) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#m01c4de5139" x="50.14375" y="234.757498" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.10 -->
+      <g transform="translate(20.878125 238.556717) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#m01c4de5139" x="50.14375" y="182.097644" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.15 -->
+      <g transform="translate(20.878125 185.896862) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#m01c4de5139" x="50.14375" y="129.437789" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.20 -->
+      <g transform="translate(20.878125 133.237008) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#m01c4de5139" x="50.14375" y="76.777934" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.25 -->
+      <g transform="translate(20.878125 80.577153) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_7">
+     <g id="line2d_16">
+      <g>
+       <use xlink:href="#m01c4de5139" x="50.14375" y="24.11808" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_17">
+      <!-- 0.30 -->
+      <g transform="translate(20.878125 27.917298) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-33" d="M 2597 2516 
+Q 3050 2419 3304 2112 
+Q 3559 1806 3559 1356 
+Q 3559 666 3084 287 
+Q 2609 -91 1734 -91 
+Q 1441 -91 1130 -33 
+Q 819 25 488 141 
+L 488 750 
+Q 750 597 1062 519 
+Q 1375 441 1716 441 
+Q 2309 441 2620 675 
+Q 2931 909 2931 1356 
+Q 2931 1769 2642 2001 
+Q 2353 2234 1838 2234 
+L 1294 2234 
+L 1294 2753 
+L 1863 2753 
+Q 2328 2753 2575 2939 
+Q 2822 3125 2822 3475 
+Q 2822 3834 2567 4026 
+Q 2313 4219 1838 4219 
+Q 1578 4219 1281 4162 
+Q 984 4106 628 3988 
+L 628 4550 
+Q 988 4650 1302 4700 
+Q 1616 4750 1894 4750 
+Q 2613 4750 3031 4423 
+Q 3450 4097 3450 3541 
+Q 3450 3153 3228 2886 
+Q 3006 2619 2597 2516 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-33" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_18">
+     <!-- pdf -->
+     <g transform="translate(14.798437 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_17">
+    <path d="M 49.980273 335.637225 
+L 65.8375 334.736468 
+L 79.406055 333.748322 
+L 91.176367 332.671195 
+L 101.475391 331.507482 
+L 110.466602 330.273989 
+L 118.64043 328.93033 
+L 125.996875 327.496416 
+L 132.699414 325.963427 
+L 138.748047 324.356705 
+L 144.30625 322.658409 
+L 149.5375 320.832194 
+L 154.441797 318.886916 
+L 159.019141 316.836785 
+L 163.433008 314.614802 
+L 167.519922 312.314333 
+L 171.443359 309.858567 
+L 175.20332 307.251374 
+L 178.963281 304.368326 
+L 182.559766 301.325415 
+L 185.992773 298.13496 
+L 189.425781 294.639469 
+L 192.858789 290.812286 
+L 196.12832 286.833732 
+L 199.397852 282.504964 
+L 202.667383 277.801558 
+L 205.936914 272.699471 
+L 209.206445 267.175617 
+L 212.475977 261.208555 
+L 215.745508 254.779281 
+L 219.015039 247.872129 
+L 222.448047 240.092979 
+L 225.881055 231.767874 
+L 229.477539 222.462394 
+L 233.2375 212.109084 
+L 237.160938 200.65729 
+L 241.411328 187.5681 
+L 246.315625 171.714465 
+L 252.527734 150.808208 
+L 266.259766 104.305898 
+L 270.510156 90.843377 
+L 274.106641 80.17063 
+L 277.212695 71.627824 
+L 279.991797 64.609961 
+L 282.607422 58.6153 
+L 284.896094 53.90298 
+L 287.021289 50.006116 
+L 288.983008 46.841768 
+L 290.78125 44.322401 
+L 292.579492 42.180676 
+L 294.214258 40.570427 
+L 295.685547 39.401236 
+L 297.156836 38.501485 
+L 298.628125 37.874245 
+L 299.935938 37.547229 
+L 301.24375 37.438125 
+L 302.551563 37.547229 
+L 303.859375 37.874245 
+L 305.167188 38.418287 
+L 306.638477 39.287894 
+L 308.109766 40.427327 
+L 309.744531 42.005085 
+L 311.379297 43.904569 
+L 313.177539 46.356153 
+L 315.139258 49.449308 
+L 317.100977 52.961707 
+L 319.226172 57.216324 
+L 321.514844 62.289187 
+L 324.130469 68.661769 
+L 326.90957 76.038477 
+L 330.015625 84.92754 
+L 333.612109 95.926624 
+L 337.8625 109.670928 
+L 343.747656 129.525029 
+L 356.825781 173.868152 
+L 361.893555 190.136296 
+L 366.307422 203.579726 
+L 370.394336 215.326259 
+L 374.154297 225.487518 
+L 377.750781 234.603667 
+L 381.347266 243.12064 
+L 384.780273 250.69317 
+L 388.213281 257.730744 
+L 391.646289 264.248742 
+L 394.91582 269.991652 
+L 398.185352 275.301822 
+L 401.454883 280.201609 
+L 404.724414 284.714654 
+L 407.993945 288.865258 
+L 411.426953 292.860044 
+L 414.859961 296.510094 
+L 418.292969 299.842551 
+L 421.889453 303.021423 
+L 425.485938 305.908697 
+L 429.245898 308.644295 
+L 433.169336 311.220532 
+L 437.25625 313.633115 
+L 441.506641 315.880831 
+L 445.920508 317.965166 
+L 450.661328 319.954548 
+L 455.729102 321.832026 
+L 461.123828 323.586194 
+L 466.845508 325.210684 
+L 473.057617 326.740957 
+L 479.760156 328.162964 
+L 487.116602 329.496388 
+L 495.29043 330.749291 
+L 504.445117 331.921942 
+L 514.744141 333.010211 
+L 526.350977 334.008468 
+L 539.756055 334.931929 
+L 555.286328 335.772726 
+L 573.595703 336.535163 
+L 595.665039 337.223325 
+L 622.638672 337.833163 
+L 656.47832 338.366643 
+L 700.453516 338.826137 
+L 719.907227 338.974836 
+L 719.907227 338.974836 
+" clip-path="url(#pf6410d50f1)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_18">
+    <path d="M 49.980273 338.071949 
+L 81.694727 337.446215 
+L 106.706641 336.739774 
+L 127.141211 335.946963 
+L 144.142773 335.069584 
+L 158.528711 334.108344 
+L 170.789453 333.072324 
+L 181.578906 331.940979 
+L 191.060547 330.72629 
+L 199.561328 329.414027 
+L 207.08125 328.033561 
+L 213.947266 326.552094 
+L 220.159375 324.99208 
+L 225.881055 323.335164 
+L 231.275781 321.545156 
+L 236.343555 319.628539 
+L 241.084375 317.596976 
+L 245.498242 315.467832 
+L 249.748633 313.171262 
+L 253.835547 310.705801 
+L 257.758984 308.072776 
+L 261.518945 305.276693 
+L 265.11543 302.32555 
+L 268.548438 299.231055 
+L 271.981445 295.840149 
+L 275.414453 292.126556 
+L 278.683984 288.264811 
+L 281.953516 284.0614 
+L 285.223047 279.491883 
+L 288.492578 274.532023 
+L 291.762109 269.158326 
+L 295.031641 263.348692 
+L 298.301172 257.083165 
+L 301.570703 250.344803 
+L 305.003711 242.746513 
+L 308.436719 234.603667 
+L 312.033203 225.487518 
+L 315.793164 215.326259 
+L 319.716602 204.063026 
+L 323.966992 191.157035 
+L 328.707813 176.01054 
+L 334.429492 156.92731 
+L 351.758008 98.5149 
+L 355.681445 86.386247 
+L 359.114453 76.490237 
+L 362.057031 68.661769 
+L 364.672656 62.289187 
+L 367.124805 56.873055 
+L 369.413477 52.348029 
+L 371.538672 48.635968 
+L 373.500391 45.650545 
+L 375.298633 43.301387 
+L 376.933398 41.497651 
+L 378.568164 40.017756 
+L 380.039453 38.967879 
+L 381.510742 38.188919 
+L 382.818555 37.726121 
+L 384.126367 37.48075 
+L 385.43418 37.453471 
+L 386.741992 37.644358 
+L 388.049805 38.052894 
+L 389.357617 38.677973 
+L 390.828906 39.637897 
+L 392.300195 40.866456 
+L 393.934961 42.541483 
+L 395.569727 44.536005 
+L 397.367969 47.089108 
+L 399.329688 50.288861 
+L 401.454883 54.222223 
+L 403.743555 58.97145 
+L 406.195703 64.609961 
+L 408.811328 71.197806 
+L 411.753906 79.239709 
+L 415.023438 88.847874 
+L 418.783398 100.606015 
+L 423.524219 116.211689 
+L 431.207617 142.414706 
+L 439.708398 171.174345 
+L 444.939648 188.083582 
+L 449.353516 201.635717 
+L 453.44043 213.494856 
+L 457.363867 224.198336 
+L 461.123828 233.799634 
+L 464.720313 242.37115 
+L 468.15332 249.995221 
+L 471.586328 257.083165 
+L 475.019336 263.649846 
+L 478.288867 269.437188 
+L 481.558398 274.789654 
+L 484.82793 279.72943 
+L 488.097461 284.280063 
+L 491.366992 288.46581 
+L 494.8 292.494875 
+L 498.233008 296.176573 
+L 501.666016 299.538136 
+L 505.2625 302.744898 
+L 508.858984 305.657554 
+L 512.618945 308.417185 
+L 516.542383 311.015982 
+L 520.629297 313.44957 
+L 524.879688 315.716691 
+L 529.293555 317.818823 
+L 534.034375 319.824958 
+L 538.938672 321.660608 
+L 544.333398 323.436179 
+L 550.055078 325.080034 
+L 556.267188 326.62807 
+L 562.969727 328.06609 
+L 570.326172 329.414027 
+L 578.5 330.68005 
+L 587.491211 331.845173 
+L 597.626758 332.931486 
+L 609.233594 333.944855 
+L 622.475195 334.871305 
+L 637.841992 335.717646 
+L 655.987891 336.487302 
+L 677.730273 337.179146 
+L 704.376953 337.795361 
+L 719.907227 338.071949 
+L 719.907227 338.071949 
+" clip-path="url(#pf6410d50f1)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_19">
+    <path d="M 49.980273 338.974836 
+L 103.273633 338.491424 
+L 142.181055 337.92682 
+L 172.097266 337.27935 
+L 195.801367 336.552879 
+L 215.255078 335.741373 
+L 231.439258 334.850874 
+L 245.171289 333.880304 
+L 257.105078 332.819202 
+L 267.567578 331.66874 
+L 276.722266 330.444063 
+L 284.896094 329.133063 
+L 292.252539 327.735338 
+L 298.955078 326.242342 
+L 305.167188 324.633295 
+L 310.888867 322.922899 
+L 316.120117 321.13354 
+L 321.024414 319.228249 
+L 325.601758 317.220911 
+L 330.015625 315.045893 
+L 334.266016 312.699316 
+L 338.189453 310.285875 
+L 342.112891 307.606484 
+L 345.872852 304.761037 
+L 349.469336 301.757793 
+L 352.902344 298.608808 
+L 356.335352 295.158499 
+L 359.768359 291.380372 
+L 363.037891 287.452243 
+L 366.307422 283.177567 
+L 369.576953 278.531905 
+L 372.846484 273.491122 
+L 376.116016 268.031949 
+L 379.385547 262.132649 
+L 382.655078 255.773798 
+L 385.924609 248.939168 
+L 389.357617 241.237637 
+L 392.790625 232.990645 
+L 396.387109 223.766172 
+L 400.14707 213.494856 
+L 404.070508 202.123332 
+L 408.320898 189.111794 
+L 413.061719 173.868152 
+L 418.946875 154.150944 
+L 434.640625 101.131502 
+L 438.727539 88.352581 
+L 442.160547 78.315916 
+L 445.266602 69.92025 
+L 448.045703 63.053307 
+L 450.497852 57.562197 
+L 452.786523 52.961707 
+L 454.911719 49.175267 
+L 456.873438 46.117899 
+L 458.67168 43.700357 
+L 460.469922 41.663566 
+L 462.104688 40.150986 
+L 463.575977 39.071211 
+L 465.047266 38.262001 
+L 466.355078 37.7721 
+L 467.662891 37.499502 
+L 468.970703 37.444945 
+L 470.278516 37.608578 
+L 471.586328 37.989957 
+L 472.894141 38.588048 
+L 474.36543 39.517905 
+L 475.836719 40.716805 
+L 477.471484 42.359478 
+L 479.10625 44.322401 
+L 480.904492 46.841768 
+L 482.866211 50.006116 
+L 484.991406 53.90298 
+L 487.280078 58.6153 
+L 489.732227 64.217283 
+L 492.347852 70.769861 
+L 495.29043 78.776913 
+L 498.559961 88.352581 
+L 502.319922 100.081595 
+L 507.060742 115.662961 
+L 514.580664 141.293237 
+L 523.244922 170.633563 
+L 528.476172 187.5681 
+L 533.053516 201.635717 
+L 537.14043 213.494856 
+L 541.063867 224.198336 
+L 544.823828 233.799634 
+L 548.420313 242.37115 
+L 551.85332 249.995221 
+L 555.286328 257.083165 
+L 558.719336 263.649846 
+L 561.988867 269.437188 
+L 565.258398 274.789654 
+L 568.52793 279.72943 
+L 571.797461 284.280063 
+L 575.066992 288.46581 
+L 578.5 292.494875 
+L 581.933008 296.176573 
+L 585.366016 299.538136 
+L 588.9625 302.744898 
+L 592.558984 305.657554 
+L 596.318945 308.417185 
+L 600.242383 311.015982 
+L 604.329297 313.44957 
+L 608.579688 315.716691 
+L 612.993555 317.818823 
+L 617.734375 319.824958 
+L 622.638672 321.660608 
+L 628.033398 323.436179 
+L 633.755078 325.080034 
+L 639.967188 326.62807 
+L 646.669727 328.06609 
+L 654.026172 329.414027 
+L 662.2 330.68005 
+L 671.191211 331.845173 
+L 681.326758 332.931486 
+L 692.933594 333.944855 
+L 706.175195 334.871305 
+L 719.907227 335.637225 
+L 719.907227 335.637225 
+" clip-path="url(#pf6410d50f1)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 50.14375 354.958125 
+L 50.14375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 719.74375 354.958125 
+L 719.74375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 50.14375 354.958125 
+L 719.74375 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 50.14375 22.318125 
+L 719.74375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_19">
+    <!-- Holtsmark Distribution (scale = 1) -->
+    <g transform="translate(282.535938 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-48" d="M 628 4666 
+L 1259 4666 
+L 1259 2753 
+L 3553 2753 
+L 3553 4666 
+L 4184 4666 
+L 4184 0 
+L 3553 0 
+L 3553 2222 
+L 1259 2222 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6d" d="M 3328 2828 
+Q 3544 3216 3844 3400 
+Q 4144 3584 4550 3584 
+Q 5097 3584 5394 3201 
+Q 5691 2819 5691 2113 
+L 5691 0 
+L 5113 0 
+L 5113 2094 
+Q 5113 2597 4934 2840 
+Q 4756 3084 4391 3084 
+Q 3944 3084 3684 2787 
+Q 3425 2491 3425 1978 
+L 3425 0 
+L 2847 0 
+L 2847 2094 
+Q 2847 2600 2669 2842 
+Q 2491 3084 2119 3084 
+Q 1678 3084 1418 2786 
+Q 1159 2488 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1356 3278 1631 3431 
+Q 1906 3584 2284 3584 
+Q 2666 3584 2933 3390 
+Q 3200 3197 3328 2828 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6b" d="M 581 4863 
+L 1159 4863 
+L 1159 1991 
+L 2875 3500 
+L 3609 3500 
+L 1753 1863 
+L 3688 0 
+L 2938 0 
+L 1159 1709 
+L 1159 0 
+L 581 0 
+L 581 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-48"/>
+     <use xlink:href="#DejaVuSans-6f" x="75.195312"/>
+     <use xlink:href="#DejaVuSans-6c" x="136.376953"/>
+     <use xlink:href="#DejaVuSans-74" x="164.160156"/>
+     <use xlink:href="#DejaVuSans-73" x="203.369141"/>
+     <use xlink:href="#DejaVuSans-6d" x="255.46875"/>
+     <use xlink:href="#DejaVuSans-61" x="352.880859"/>
+     <use xlink:href="#DejaVuSans-72" x="414.160156"/>
+     <use xlink:href="#DejaVuSans-6b" x="455.273438"/>
+     <use xlink:href="#DejaVuSans-20" x="513.183594"/>
+     <use xlink:href="#DejaVuSans-44" x="544.970703"/>
+     <use xlink:href="#DejaVuSans-69" x="621.972656"/>
+     <use xlink:href="#DejaVuSans-73" x="649.755859"/>
+     <use xlink:href="#DejaVuSans-74" x="701.855469"/>
+     <use xlink:href="#DejaVuSans-72" x="741.064453"/>
+     <use xlink:href="#DejaVuSans-69" x="782.177734"/>
+     <use xlink:href="#DejaVuSans-62" x="809.960938"/>
+     <use xlink:href="#DejaVuSans-75" x="873.4375"/>
+     <use xlink:href="#DejaVuSans-74" x="936.816406"/>
+     <use xlink:href="#DejaVuSans-69" x="976.025391"/>
+     <use xlink:href="#DejaVuSans-6f" x="1003.808594"/>
+     <use xlink:href="#DejaVuSans-6e" x="1064.990234"/>
+     <use xlink:href="#DejaVuSans-20" x="1128.369141"/>
+     <use xlink:href="#DejaVuSans-28" x="1160.15625"/>
+     <use xlink:href="#DejaVuSans-73" x="1199.169922"/>
+     <use xlink:href="#DejaVuSans-63" x="1251.269531"/>
+     <use xlink:href="#DejaVuSans-61" x="1306.25"/>
+     <use xlink:href="#DejaVuSans-6c" x="1367.529297"/>
+     <use xlink:href="#DejaVuSans-65" x="1395.3125"/>
+     <use xlink:href="#DejaVuSans-20" x="1456.835938"/>
+     <use xlink:href="#DejaVuSans-3d" x="1488.623047"/>
+     <use xlink:href="#DejaVuSans-20" x="1572.412109"/>
+     <use xlink:href="#DejaVuSans-31" x="1604.199219"/>
+     <use xlink:href="#DejaVuSans-29" x="1667.822266"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 622.715625 74.3525 
+L 712.74375 74.3525 
+Q 714.74375 74.3525 714.74375 72.3525 
+L 714.74375 29.318125 
+Q 714.74375 27.318125 712.74375 27.318125 
+L 622.715625 27.318125 
+Q 620.715625 27.318125 620.715625 29.318125 
+L 620.715625 72.3525 
+Q 620.715625 74.3525 622.715625 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_20">
+     <path d="M 624.715625 35.416562 
+L 634.715625 35.416562 
+L 644.715625 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_20">
+     <!-- location=-2 -->
+     <g transform="translate(652.715625 38.916562) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-2d" d="M 313 2009 
+L 1997 2009 
+L 1997 1497 
+L 313 1497 
+L 313 2009 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-2d" x="480.566406"/>
+      <use xlink:href="#DejaVuSans-32" x="516.650391"/>
+     </g>
+    </g>
+    <g id="line2d_21">
+     <path d="M 624.715625 50.094687 
+L 634.715625 50.094687 
+L 644.715625 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- location=0 -->
+     <g transform="translate(652.715625 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-30" x="480.566406"/>
+     </g>
+    </g>
+    <g id="line2d_22">
+     <path d="M 624.715625 64.772812 
+L 634.715625 64.772812 
+L 644.715625 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_22">
+     <!-- location=2 -->
+     <g transform="translate(652.715625 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-32" x="480.566406"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pf6410d50f1">
+   <rect x="50.14375" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/holtsmark_pdf2.svg b/doc/graphs/holtsmark_pdf2.svg
new file mode 100644
index 0000000000..420075c9bf
--- /dev/null
+++ b/doc/graphs/holtsmark_pdf2.svg
@@ -0,0 +1,1423 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="723.7625pt" height="392.514375pt" viewBox="0 0 723.7625 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:41:11.570758</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 723.7625 392.514375 
+L 723.7625 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+L 713.38125 22.318125 
+L 43.78125 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="m104aa76046" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m104aa76046" x="43.78125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(36.410156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#m104aa76046" x="127.48125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −6 -->
+      <g transform="translate(120.110156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-36" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#m104aa76046" x="211.18125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- −4 -->
+      <g transform="translate(203.810156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#m104aa76046" x="294.88125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- −2 -->
+      <g transform="translate(287.510156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-32" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#m104aa76046" x="378.58125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 0 -->
+      <g transform="translate(375.4 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#m104aa76046" x="462.28125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 2 -->
+      <g transform="translate(459.1 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m104aa76046" x="545.98125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 4 -->
+      <g transform="translate(542.8 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m104aa76046" x="629.68125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 6 -->
+      <g transform="translate(626.5 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m104aa76046" x="713.38125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 8 -->
+      <g transform="translate(710.2 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- $x$ -->
+     <g transform="translate(375.58125 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_10">
+      <defs>
+       <path id="mf7b1945735" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#mf7b1945735" x="43.78125" y="339.893462" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 0.0 -->
+      <g transform="translate(20.878125 343.692681) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#mf7b1945735" x="43.78125" y="287.26558" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 0.1 -->
+      <g transform="translate(20.878125 291.064798) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#mf7b1945735" x="43.78125" y="234.637697" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.2 -->
+      <g transform="translate(20.878125 238.436916) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#mf7b1945735" x="43.78125" y="182.009815" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.3 -->
+      <g transform="translate(20.878125 185.809033) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-33" d="M 2597 2516 
+Q 3050 2419 3304 2112 
+Q 3559 1806 3559 1356 
+Q 3559 666 3084 287 
+Q 2609 -91 1734 -91 
+Q 1441 -91 1130 -33 
+Q 819 25 488 141 
+L 488 750 
+Q 750 597 1062 519 
+Q 1375 441 1716 441 
+Q 2309 441 2620 675 
+Q 2931 909 2931 1356 
+Q 2931 1769 2642 2001 
+Q 2353 2234 1838 2234 
+L 1294 2234 
+L 1294 2753 
+L 1863 2753 
+Q 2328 2753 2575 2939 
+Q 2822 3125 2822 3475 
+Q 2822 3834 2567 4026 
+Q 2313 4219 1838 4219 
+Q 1578 4219 1281 4162 
+Q 984 4106 628 3988 
+L 628 4550 
+Q 988 4650 1302 4700 
+Q 1616 4750 1894 4750 
+Q 2613 4750 3031 4423 
+Q 3450 4097 3450 3541 
+Q 3450 3153 3228 2886 
+Q 3006 2619 2597 2516 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-33" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#mf7b1945735" x="43.78125" y="129.381932" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.4 -->
+      <g transform="translate(20.878125 133.181151) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-34" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#mf7b1945735" x="43.78125" y="76.754049" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.5 -->
+      <g transform="translate(20.878125 80.553268) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_7">
+     <g id="line2d_16">
+      <g>
+       <use xlink:href="#mf7b1945735" x="43.78125" y="24.126167" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_17">
+      <!-- 0.6 -->
+      <g transform="translate(20.878125 27.925386) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-36" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_18">
+     <!-- pdf -->
+     <g transform="translate(14.798438 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_17">
+    <path d="M 43.617773 339.57049 
+L 117.836133 339.275274 
+L 161.320898 338.895106 
+L 190.25625 338.43285 
+L 211.18125 337.886764 
+L 227.038477 337.260168 
+L 239.626172 336.547137 
+L 249.761719 335.758362 
+L 258.2625 334.879224 
+L 265.455469 333.915872 
+L 271.667578 332.861889 
+L 277.062305 331.724061 
+L 281.803125 330.50201 
+L 286.053516 329.181393 
+L 289.976953 327.725886 
+L 293.409961 326.222959 
+L 296.679492 324.549626 
+L 299.62207 322.801798 
+L 302.401172 320.901558 
+L 305.016797 318.853344 
+L 307.468945 316.66715 
+L 309.921094 314.183539 
+L 312.209766 311.557664 
+L 314.498438 308.592379 
+L 316.787109 305.241509 
+L 318.912305 301.740098 
+L 321.0375 297.817448 
+L 323.162695 293.425511 
+L 325.287891 288.513427 
+L 327.413086 283.028368 
+L 329.538281 276.916752 
+L 331.663477 270.125898 
+L 333.788672 262.606168 
+L 336.077344 253.642657 
+L 338.366016 243.739857 
+L 340.654688 232.871916 
+L 343.106836 220.154244 
+L 345.722461 205.404327 
+L 348.665039 187.47695 
+L 352.098047 165.11386 
+L 358.473633 121.678092 
+L 362.233594 96.922265 
+L 364.849219 81.082033 
+L 366.974414 69.479151 
+L 368.772656 60.775767 
+L 370.407422 53.892984 
+L 371.878711 48.62917 
+L 373.186523 44.748281 
+L 374.330859 42.002312 
+L 375.311719 40.149339 
+L 376.292578 38.770446 
+L 377.109961 37.989622 
+L 377.927344 37.547163 
+L 378.58125 37.438125 
+L 379.235156 37.547163 
+L 379.889063 37.87398 
+L 380.706445 38.58735 
+L 381.523828 39.636561 
+L 382.504687 41.33261 
+L 383.485547 43.495614 
+L 384.629883 46.591885 
+L 385.937695 50.854824 
+L 387.408984 56.52081 
+L 389.04375 63.810916 
+L 390.841992 72.908603 
+L 392.967188 84.898707 
+L 395.419336 100.043561 
+L 398.852344 122.786744 
+L 408.333984 186.443898 
+L 411.440039 205.404327 
+L 414.219141 221.036184 
+L 416.834766 234.483959 
+L 419.286914 245.942566 
+L 421.575586 255.641237 
+L 423.864258 264.408602 
+L 426.15293 272.290805 
+L 428.441602 279.344922 
+L 430.566797 285.209228 
+L 432.691992 290.467581 
+L 434.817187 295.173441 
+L 436.942383 299.379003 
+L 439.231055 303.405688 
+L 441.519727 306.968222 
+L 443.808398 310.120217 
+L 446.09707 312.910335 
+L 448.549219 315.547736 
+L 451.001367 317.867511 
+L 453.616992 320.038836 
+L 456.396094 322.051035 
+L 459.338672 323.899454 
+L 462.444727 325.584507 
+L 465.714258 327.11063 
+L 469.310742 328.54558 
+L 473.23418 329.871537 
+L 477.648047 331.121409 
+L 482.552344 332.270775 
+L 488.110547 333.335014 
+L 494.322656 334.294016 
+L 501.515625 335.17565 
+L 510.016406 335.985109 
+L 520.151953 336.715964 
+L 532.412695 337.366316 
+L 547.616016 337.938977 
+L 566.90625 338.43285 
+L 592.408594 338.852112 
+L 627.883008 339.198317 
+L 680.522461 339.471237 
+L 713.544727 339.57049 
+L 713.544727 339.57049 
+" clip-path="url(#pbdb77fdbf7)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_18">
+    <path d="M 43.617773 338.891441 
+L 87.102539 338.427302 
+L 118.490039 337.880178 
+L 142.357617 337.250915 
+L 161.320898 336.535497 
+L 176.687695 335.74108 
+L 189.602344 334.857057 
+L 200.555273 333.89155 
+L 210.036914 332.840857 
+L 218.537695 331.67775 
+L 226.057617 330.427379 
+L 232.760156 329.095295 
+L 238.972266 327.637874 
+L 244.693945 326.06886 
+L 249.925195 324.410568 
+L 254.829492 322.630923 
+L 259.570312 320.673683 
+L 263.98418 318.614521 
+L 268.23457 316.389864 
+L 272.321484 314.002993 
+L 276.244922 311.460915 
+L 280.168359 308.651898 
+L 283.92832 305.690215 
+L 287.688281 302.446904 
+L 291.448242 298.90614 
+L 295.208203 295.0545 
+L 298.968164 290.881908 
+L 302.728125 286.382689 
+L 306.651563 281.33953 
+L 310.738477 275.716271 
+L 314.988867 269.488494 
+L 319.729688 262.130295 
+L 325.124414 253.322627 
+L 332.480859 240.842036 
+L 342.616406 223.68273 
+L 347.357227 216.107354 
+L 351.280664 210.252978 
+L 354.550195 205.750264 
+L 357.492773 202.047243 
+L 360.271875 198.895846 
+L 362.8875 196.26885 
+L 365.339648 194.128778 
+L 367.62832 192.430716 
+L 369.753516 191.124976 
+L 371.878711 190.089622 
+L 373.84043 189.380243 
+L 375.802148 188.911785 
+L 377.763867 188.687093 
+L 379.562109 188.696464 
+L 381.360352 188.911785 
+L 383.32207 189.380243 
+L 385.283789 190.089622 
+L 387.245508 191.035629 
+L 389.370703 192.320872 
+L 391.495898 193.86814 
+L 393.78457 195.815155 
+L 396.236719 198.207136 
+L 398.852344 201.083781 
+L 401.631445 204.476685 
+L 404.7375 208.634291 
+L 408.170508 213.614781 
+L 412.093945 219.707002 
+L 416.998242 227.754465 
+L 425.008594 241.402355 
+L 433.672852 256.032763 
+L 439.067578 264.712788 
+L 443.808398 271.927661 
+L 448.222266 278.236062 
+L 452.30918 283.694853 
+L 456.232617 288.575393 
+L 460.156055 293.099421 
+L 463.916016 297.103547 
+L 467.675977 300.791499 
+L 471.435938 304.175222 
+L 475.195898 307.269505 
+L 479.119336 310.207852 
+L 483.042773 312.869665 
+L 487.129688 315.370986 
+L 491.380078 317.703759 
+L 495.793945 319.86391 
+L 500.371289 321.851032 
+L 505.112109 323.667958 
+L 510.179883 325.371672 
+L 515.574609 326.949893 
+L 521.459766 328.434189 
+L 527.835352 329.805931 
+L 534.701367 331.054488 
+L 542.384766 332.221544 
+L 550.885547 333.285158 
+L 560.530664 334.264365 
+L 571.64707 335.162782 
+L 584.561719 335.975919 
+L 599.765039 336.70401 
+L 618.237891 337.357105 
+L 640.961133 337.929034 
+L 669.896484 338.425028 
+L 708.15 338.846252 
+L 713.544727 338.891441 
+L 713.544727 338.891441 
+" clip-path="url(#pbdb77fdbf7)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_19">
+    <path d="M 43.617773 336.30075 
+L 66.667969 335.460324 
+L 86.448633 334.520808 
+L 103.613672 333.487991 
+L 118.980469 332.343389 
+L 132.7125 331.102201 
+L 145.300195 329.745729 
+L 156.907031 328.276474 
+L 167.859961 326.668187 
+L 178.158984 324.933651 
+L 187.967578 323.059255 
+L 197.449219 321.022247 
+L 206.603906 318.830406 
+L 215.595117 316.451128 
+L 224.586328 313.840092 
+L 233.577539 310.995207 
+L 242.732227 307.862897 
+L 252.213867 304.381963 
+L 262.512891 300.358035 
+L 274.44668 295.444646 
+L 294.227344 287.008186 
+L 308.776758 280.919989 
+L 318.258398 277.186272 
+L 326.26875 274.263877 
+L 333.461719 271.87388 
+L 340.000781 269.930427 
+L 346.212891 268.314264 
+L 352.098047 267.01112 
+L 357.819727 265.97326 
+L 363.37793 265.193805 
+L 368.772656 264.661834 
+L 374.167383 264.357221 
+L 379.398633 264.282291 
+L 384.629883 264.425259 
+L 390.024609 264.799257 
+L 395.419336 265.399041 
+L 400.977539 266.245922 
+L 406.699219 267.349797 
+L 412.584375 268.716459 
+L 418.796484 270.394065 
+L 425.335547 272.395229 
+L 432.365039 274.781661 
+L 440.211914 277.685551 
+L 449.366602 281.319464 
+L 461.463867 286.381017 
+L 493.17832 299.765142 
+L 503.967773 304.009023 
+L 513.776367 307.630346 
+L 523.094531 310.833358 
+L 532.085742 313.690938 
+L 541.076953 316.314728 
+L 550.068164 318.706502 
+L 559.222852 320.910554 
+L 568.541016 322.926077 
+L 578.186133 324.786033 
+L 588.32168 326.512671 
+L 598.947656 328.096338 
+L 610.227539 329.552238 
+L 622.324805 330.888907 
+L 635.566406 332.124196 
+L 650.11582 333.252096 
+L 666.136523 334.26751 
+L 684.282422 335.190106 
+L 705.043945 336.017705 
+L 713.544727 336.30075 
+L 713.544727 336.30075 
+" clip-path="url(#pbdb77fdbf7)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 43.78125 354.958125 
+L 43.78125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 713.38125 354.958125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 43.78125 22.318125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_19">
+    <!-- Holtsmark Distribution (location = 0) -->
+    <g transform="translate(267.826875 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-48" d="M 628 4666 
+L 1259 4666 
+L 1259 2753 
+L 3553 2753 
+L 3553 4666 
+L 4184 4666 
+L 4184 0 
+L 3553 0 
+L 3553 2222 
+L 1259 2222 
+L 1259 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6d" d="M 3328 2828 
+Q 3544 3216 3844 3400 
+Q 4144 3584 4550 3584 
+Q 5097 3584 5394 3201 
+Q 5691 2819 5691 2113 
+L 5691 0 
+L 5113 0 
+L 5113 2094 
+Q 5113 2597 4934 2840 
+Q 4756 3084 4391 3084 
+Q 3944 3084 3684 2787 
+Q 3425 2491 3425 1978 
+L 3425 0 
+L 2847 0 
+L 2847 2094 
+Q 2847 2600 2669 2842 
+Q 2491 3084 2119 3084 
+Q 1678 3084 1418 2786 
+Q 1159 2488 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1356 3278 1631 3431 
+Q 1906 3584 2284 3584 
+Q 2666 3584 2933 3390 
+Q 3200 3197 3328 2828 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6b" d="M 581 4863 
+L 1159 4863 
+L 1159 1991 
+L 2875 3500 
+L 3609 3500 
+L 1753 1863 
+L 3688 0 
+L 2938 0 
+L 1159 1709 
+L 1159 0 
+L 581 0 
+L 581 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-48"/>
+     <use xlink:href="#DejaVuSans-6f" x="75.195312"/>
+     <use xlink:href="#DejaVuSans-6c" x="136.376953"/>
+     <use xlink:href="#DejaVuSans-74" x="164.160156"/>
+     <use xlink:href="#DejaVuSans-73" x="203.369141"/>
+     <use xlink:href="#DejaVuSans-6d" x="255.46875"/>
+     <use xlink:href="#DejaVuSans-61" x="352.880859"/>
+     <use xlink:href="#DejaVuSans-72" x="414.160156"/>
+     <use xlink:href="#DejaVuSans-6b" x="455.273438"/>
+     <use xlink:href="#DejaVuSans-20" x="513.183594"/>
+     <use xlink:href="#DejaVuSans-44" x="544.970703"/>
+     <use xlink:href="#DejaVuSans-69" x="621.972656"/>
+     <use xlink:href="#DejaVuSans-73" x="649.755859"/>
+     <use xlink:href="#DejaVuSans-74" x="701.855469"/>
+     <use xlink:href="#DejaVuSans-72" x="741.064453"/>
+     <use xlink:href="#DejaVuSans-69" x="782.177734"/>
+     <use xlink:href="#DejaVuSans-62" x="809.960938"/>
+     <use xlink:href="#DejaVuSans-75" x="873.4375"/>
+     <use xlink:href="#DejaVuSans-74" x="936.816406"/>
+     <use xlink:href="#DejaVuSans-69" x="976.025391"/>
+     <use xlink:href="#DejaVuSans-6f" x="1003.808594"/>
+     <use xlink:href="#DejaVuSans-6e" x="1064.990234"/>
+     <use xlink:href="#DejaVuSans-20" x="1128.369141"/>
+     <use xlink:href="#DejaVuSans-28" x="1160.15625"/>
+     <use xlink:href="#DejaVuSans-6c" x="1199.169922"/>
+     <use xlink:href="#DejaVuSans-6f" x="1226.953125"/>
+     <use xlink:href="#DejaVuSans-63" x="1288.134766"/>
+     <use xlink:href="#DejaVuSans-61" x="1343.115234"/>
+     <use xlink:href="#DejaVuSans-74" x="1404.394531"/>
+     <use xlink:href="#DejaVuSans-69" x="1443.603516"/>
+     <use xlink:href="#DejaVuSans-6f" x="1471.386719"/>
+     <use xlink:href="#DejaVuSans-6e" x="1532.568359"/>
+     <use xlink:href="#DejaVuSans-20" x="1595.947266"/>
+     <use xlink:href="#DejaVuSans-3d" x="1627.734375"/>
+     <use xlink:href="#DejaVuSans-20" x="1711.523438"/>
+     <use xlink:href="#DejaVuSans-30" x="1743.310547"/>
+     <use xlink:href="#DejaVuSans-29" x="1806.933594"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 624.33125 74.3525 
+L 706.38125 74.3525 
+Q 708.38125 74.3525 708.38125 72.3525 
+L 708.38125 29.318125 
+Q 708.38125 27.318125 706.38125 27.318125 
+L 624.33125 27.318125 
+Q 622.33125 27.318125 622.33125 29.318125 
+L 622.33125 72.3525 
+Q 622.33125 74.3525 624.33125 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_20">
+     <path d="M 626.33125 35.416562 
+L 636.33125 35.416562 
+L 646.33125 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_20">
+     <!-- scale=0.5 -->
+     <g transform="translate(654.33125 38.916562) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-30" x="341.455078"/>
+      <use xlink:href="#DejaVuSans-2e" x="405.078125"/>
+      <use xlink:href="#DejaVuSans-35" x="436.865234"/>
+     </g>
+    </g>
+    <g id="line2d_21">
+     <path d="M 626.33125 50.094687 
+L 636.33125 50.094687 
+L 646.33125 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- scale=1 -->
+     <g transform="translate(654.33125 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-31" x="341.455078"/>
+     </g>
+    </g>
+    <g id="line2d_22">
+     <path d="M 626.33125 64.772812 
+L 636.33125 64.772812 
+L 646.33125 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_22">
+     <!-- scale=2 -->
+     <g transform="translate(654.33125 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-32" x="341.455078"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pbdb77fdbf7">
+   <rect x="43.78125" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/holtsmark_pdf_accuracy_64.png b/doc/graphs/holtsmark_pdf_accuracy_64.png
new file mode 100644
index 0000000000..fae6407447
Binary files /dev/null and b/doc/graphs/holtsmark_pdf_accuracy_64.png differ
diff --git a/doc/graphs/landau_ccdf_accuracy_64.png b/doc/graphs/landau_ccdf_accuracy_64.png
new file mode 100644
index 0000000000..7e157eb81f
Binary files /dev/null and b/doc/graphs/landau_ccdf_accuracy_64.png differ
diff --git a/doc/graphs/landau_cdf_accuracy_64.png b/doc/graphs/landau_cdf_accuracy_64.png
new file mode 100644
index 0000000000..3c4a48eaeb
Binary files /dev/null and b/doc/graphs/landau_cdf_accuracy_64.png differ
diff --git a/doc/graphs/landau_pdf1.svg b/doc/graphs/landau_pdf1.svg
new file mode 100644
index 0000000000..f07e248b15
--- /dev/null
+++ b/doc/graphs/landau_pdf1.svg
@@ -0,0 +1,1355 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="733.30625pt" height="392.514375pt" viewBox="0 0 733.30625 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:07:44.252944</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 733.30625 392.514375 
+L 733.30625 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 50.14375 354.958125 
+L 719.74375 354.958125 
+L 719.74375 22.318125 
+L 50.14375 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="m729760d6a0" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m729760d6a0" x="50.14375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(42.772656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#m729760d6a0" x="133.84375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −4 -->
+      <g transform="translate(126.472656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#m729760d6a0" x="217.54375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- 0 -->
+      <g transform="translate(214.3625 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#m729760d6a0" x="301.24375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- 4 -->
+      <g transform="translate(298.0625 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#m729760d6a0" x="384.94375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 8 -->
+      <g transform="translate(381.7625 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#m729760d6a0" x="468.64375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 12 -->
+      <g transform="translate(462.28125 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-32" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m729760d6a0" x="552.34375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 16 -->
+      <g transform="translate(545.98125 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-36" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m729760d6a0" x="636.04375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 20 -->
+      <g transform="translate(629.68125 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m729760d6a0" x="719.74375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 24 -->
+      <g transform="translate(713.38125 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-34" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- $x$ -->
+     <g transform="translate(381.94375 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_10">
+      <defs>
+       <path id="mb151dfe11c" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#mb151dfe11c" x="50.14375" y="339.838125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 0.00 -->
+      <g transform="translate(20.878125 343.637344) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#mb151dfe11c" x="50.14375" y="286.556135" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 0.05 -->
+      <g transform="translate(20.878125 290.355354) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#mb151dfe11c" x="50.14375" y="233.274145" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.10 -->
+      <g transform="translate(20.878125 237.073364) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#mb151dfe11c" x="50.14375" y="179.992155" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.15 -->
+      <g transform="translate(20.878125 183.791374) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#mb151dfe11c" x="50.14375" y="126.710165" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.20 -->
+      <g transform="translate(20.878125 130.509384) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#mb151dfe11c" x="50.14375" y="73.428175" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.25 -->
+      <g transform="translate(20.878125 77.227394) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_17">
+     <!-- pdf -->
+     <g transform="translate(14.798437 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_16">
+    <path d="M 50.14375 339.838125 
+L 125.833398 339.731071 
+L 127.631641 339.489144 
+L 128.939453 339.092479 
+L 130.002051 338.531506 
+L 130.901172 337.812226 
+L 131.800293 336.792209 
+L 132.617676 335.534307 
+L 133.435059 333.891374 
+L 134.33418 331.55445 
+L 135.233301 328.57032 
+L 136.21416 324.470131 
+L 137.19502 319.386615 
+L 138.257617 312.672798 
+L 139.401953 303.962088 
+L 140.628027 292.895343 
+L 142.017578 278.249205 
+L 143.652344 258.434909 
+L 145.614062 231.735491 
+L 148.883594 183.569157 
+L 152.153125 136.527686 
+L 154.196582 110.259907 
+L 155.913086 90.952074 
+L 157.466113 75.977425 
+L 158.855664 64.708153 
+L 160.081738 56.4596 
+L 161.226074 50.178841 
+L 162.288672 45.539485 
+L 163.269531 42.238155 
+L 164.086914 40.175602 
+L 164.822559 38.830416 
+L 165.476465 38.023204 
+L 166.130371 37.565205 
+L 166.702539 37.438125 
+L 167.274707 37.554826 
+L 167.846875 37.904071 
+L 168.500781 38.573479 
+L 169.236426 39.651053 
+L 170.135547 41.401252 
+L 171.198145 44.029145 
+L 172.424219 47.729989 
+L 173.81377 52.670138 
+L 175.448535 59.320623 
+L 177.491992 68.619991 
+L 180.189355 82.03922 
+L 184.684961 105.75457 
+L 190.570117 136.542998 
+L 194.24834 154.602787 
+L 197.517871 169.585609 
+L 200.623926 182.802117 
+L 203.648242 194.702174 
+L 206.59082 205.38073 
+L 209.45166 214.946171 
+L 212.3125 223.750396 
+L 215.17334 231.842245 
+L 218.03418 239.272765 
+L 220.89502 246.093009 
+L 223.755859 252.352593 
+L 226.698437 258.255867 
+L 229.641016 263.664237 
+L 232.665332 268.754163 
+L 235.689648 273.413198 
+L 238.795703 277.792356 
+L 241.983496 281.900678 
+L 245.253027 285.748456 
+L 248.604297 289.346896 
+L 252.037305 292.707819 
+L 255.552051 295.843408 
+L 259.230273 298.829295 
+L 263.071973 301.660424 
+L 267.077148 304.334406 
+L 271.245801 306.851147 
+L 275.659668 309.254759 
+L 280.31875 311.535911 
+L 285.223047 313.68872 
+L 290.454297 315.740594 
+L 296.0125 317.68119 
+L 301.979395 319.527902 
+L 308.436719 321.290211 
+L 315.384473 322.953368 
+L 322.986133 324.539833 
+L 331.241699 326.031997 
+L 340.314648 327.442412 
+L 350.368457 328.775286 
+L 361.484863 330.020996 
+L 373.99082 331.193577 
+L 388.049805 332.283781 
+L 404.070508 333.298126 
+L 422.461621 334.234781 
+L 443.877051 335.097087 
+L 469.13418 335.884868 
+L 499.377344 336.598239 
+L 536.241309 337.237314 
+L 582.259961 337.803525 
+L 641.275 338.296934 
+L 719.74375 338.718656 
+L 719.74375 338.718656 
+" clip-path="url(#pd6b03f5331)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_17">
+    <path d="M 50.14375 339.838125 
+L 167.683398 339.731071 
+L 169.481641 339.489144 
+L 170.789453 339.092479 
+L 171.852051 338.531506 
+L 172.751172 337.812226 
+L 173.650293 336.792209 
+L 174.467676 335.534307 
+L 175.285059 333.891374 
+L 176.18418 331.55445 
+L 177.083301 328.57032 
+L 178.06416 324.470131 
+L 179.04502 319.386615 
+L 180.107617 312.672798 
+L 181.251953 303.962088 
+L 182.478027 292.895343 
+L 183.867578 278.249205 
+L 185.502344 258.434909 
+L 187.464062 231.735491 
+L 190.733594 183.569157 
+L 194.003125 136.527686 
+L 196.046582 110.259907 
+L 197.763086 90.952074 
+L 199.316113 75.977425 
+L 200.705664 64.708153 
+L 201.931738 56.4596 
+L 203.076074 50.178841 
+L 204.138672 45.539485 
+L 205.119531 42.238155 
+L 205.936914 40.175602 
+L 206.672559 38.830416 
+L 207.326465 38.023204 
+L 207.980371 37.565205 
+L 208.552539 37.438125 
+L 209.124707 37.554826 
+L 209.696875 37.904071 
+L 210.350781 38.573479 
+L 211.086426 39.651053 
+L 211.985547 41.401252 
+L 213.048145 44.029145 
+L 214.274219 47.729989 
+L 215.66377 52.670138 
+L 217.298535 59.320623 
+L 219.341992 68.619991 
+L 222.039355 82.03922 
+L 226.534961 105.75457 
+L 232.420117 136.542998 
+L 236.09834 154.602787 
+L 239.367871 169.585609 
+L 242.473926 182.802117 
+L 245.498242 194.702174 
+L 248.44082 205.38073 
+L 251.30166 214.946171 
+L 254.1625 223.750396 
+L 257.02334 231.842245 
+L 259.88418 239.272765 
+L 262.74502 246.093009 
+L 265.605859 252.352593 
+L 268.548437 258.255867 
+L 271.491016 263.664237 
+L 274.515332 268.754163 
+L 277.539648 273.413198 
+L 280.645703 277.792356 
+L 283.833496 281.900678 
+L 287.103027 285.748456 
+L 290.454297 289.346896 
+L 293.887305 292.707819 
+L 297.402051 295.843408 
+L 301.080273 298.829295 
+L 304.921973 301.660424 
+L 308.927148 304.334406 
+L 313.095801 306.851147 
+L 317.509668 309.254759 
+L 322.16875 311.535911 
+L 327.073047 313.68872 
+L 332.304297 315.740594 
+L 337.8625 317.68119 
+L 343.829395 319.527902 
+L 350.286719 321.290211 
+L 357.234473 322.953368 
+L 364.836133 324.539833 
+L 373.091699 326.031997 
+L 382.164648 327.442412 
+L 392.218457 328.775286 
+L 403.334863 330.020996 
+L 415.84082 331.193577 
+L 429.899805 332.283781 
+L 445.920508 333.298126 
+L 464.311621 334.234781 
+L 485.727051 335.097087 
+L 510.98418 335.884868 
+L 541.227344 336.598239 
+L 578.091309 337.237314 
+L 624.109961 337.803525 
+L 683.125 338.296934 
+L 719.74375 338.518698 
+L 719.74375 338.518698 
+" clip-path="url(#pd6b03f5331)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_18">
+    <path d="M 50.14375 339.838125 
+L 209.533398 339.731071 
+L 211.331641 339.489144 
+L 212.639453 339.092479 
+L 213.702051 338.531506 
+L 214.601172 337.812226 
+L 215.500293 336.792209 
+L 216.317676 335.534307 
+L 217.135059 333.891374 
+L 218.03418 331.55445 
+L 218.933301 328.57032 
+L 219.91416 324.470131 
+L 220.89502 319.386615 
+L 221.957617 312.672798 
+L 223.101953 303.962088 
+L 224.328027 292.895343 
+L 225.717578 278.249205 
+L 227.352344 258.434909 
+L 229.314062 231.735491 
+L 232.583594 183.569157 
+L 235.853125 136.527686 
+L 237.896582 110.259907 
+L 239.613086 90.952074 
+L 241.166113 75.977425 
+L 242.555664 64.708153 
+L 243.781738 56.4596 
+L 244.926074 50.178841 
+L 245.988672 45.539485 
+L 246.969531 42.238155 
+L 247.786914 40.175602 
+L 248.522559 38.830416 
+L 249.176465 38.023204 
+L 249.830371 37.565205 
+L 250.402539 37.438125 
+L 250.974707 37.554826 
+L 251.546875 37.904071 
+L 252.200781 38.573479 
+L 252.936426 39.651053 
+L 253.835547 41.401252 
+L 254.898145 44.029145 
+L 256.124219 47.729989 
+L 257.51377 52.670138 
+L 259.148535 59.320623 
+L 261.191992 68.619991 
+L 263.889355 82.03922 
+L 268.384961 105.75457 
+L 274.270117 136.542998 
+L 277.94834 154.602787 
+L 281.217871 169.585609 
+L 284.323926 182.802117 
+L 287.348242 194.702174 
+L 290.29082 205.38073 
+L 293.15166 214.946171 
+L 296.0125 223.750396 
+L 298.87334 231.842245 
+L 301.73418 239.272765 
+L 304.59502 246.093009 
+L 307.455859 252.352593 
+L 310.398438 258.255867 
+L 313.341016 263.664237 
+L 316.365332 268.754163 
+L 319.389648 273.413198 
+L 322.495703 277.792356 
+L 325.683496 281.900678 
+L 328.953027 285.748456 
+L 332.304297 289.346896 
+L 335.737305 292.707819 
+L 339.252051 295.843408 
+L 342.930273 298.829295 
+L 346.771973 301.660424 
+L 350.777148 304.334406 
+L 354.945801 306.851147 
+L 359.359668 309.254759 
+L 364.01875 311.535911 
+L 368.923047 313.68872 
+L 374.154297 315.740594 
+L 379.7125 317.68119 
+L 385.679395 319.527902 
+L 392.136719 321.290211 
+L 399.084473 322.953368 
+L 406.686133 324.539833 
+L 414.941699 326.031997 
+L 424.014648 327.442412 
+L 434.068457 328.775286 
+L 445.184863 330.020996 
+L 457.69082 331.193577 
+L 471.749805 332.283781 
+L 487.770508 333.298126 
+L 506.161621 334.234781 
+L 527.577051 335.097087 
+L 552.83418 335.884868 
+L 583.077344 336.598239 
+L 619.941309 337.237314 
+L 665.959961 337.803525 
+L 719.74375 338.260799 
+L 719.74375 338.260799 
+" clip-path="url(#pd6b03f5331)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 50.14375 354.958125 
+L 50.14375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 719.74375 354.958125 
+L 719.74375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 50.14375 354.958125 
+L 719.74375 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 50.14375 22.318125 
+L 719.74375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_18">
+    <!-- Landau Distribution (scale = 1) -->
+    <g transform="translate(291.215313 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-4c" d="M 628 4666 
+L 1259 4666 
+L 1259 531 
+L 3531 531 
+L 3531 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-4c"/>
+     <use xlink:href="#DejaVuSans-61" x="55.712891"/>
+     <use xlink:href="#DejaVuSans-6e" x="116.992188"/>
+     <use xlink:href="#DejaVuSans-64" x="180.371094"/>
+     <use xlink:href="#DejaVuSans-61" x="243.847656"/>
+     <use xlink:href="#DejaVuSans-75" x="305.126953"/>
+     <use xlink:href="#DejaVuSans-20" x="368.505859"/>
+     <use xlink:href="#DejaVuSans-44" x="400.292969"/>
+     <use xlink:href="#DejaVuSans-69" x="477.294922"/>
+     <use xlink:href="#DejaVuSans-73" x="505.078125"/>
+     <use xlink:href="#DejaVuSans-74" x="557.177734"/>
+     <use xlink:href="#DejaVuSans-72" x="596.386719"/>
+     <use xlink:href="#DejaVuSans-69" x="637.5"/>
+     <use xlink:href="#DejaVuSans-62" x="665.283203"/>
+     <use xlink:href="#DejaVuSans-75" x="728.759766"/>
+     <use xlink:href="#DejaVuSans-74" x="792.138672"/>
+     <use xlink:href="#DejaVuSans-69" x="831.347656"/>
+     <use xlink:href="#DejaVuSans-6f" x="859.130859"/>
+     <use xlink:href="#DejaVuSans-6e" x="920.3125"/>
+     <use xlink:href="#DejaVuSans-20" x="983.691406"/>
+     <use xlink:href="#DejaVuSans-28" x="1015.478516"/>
+     <use xlink:href="#DejaVuSans-73" x="1054.492188"/>
+     <use xlink:href="#DejaVuSans-63" x="1106.591797"/>
+     <use xlink:href="#DejaVuSans-61" x="1161.572266"/>
+     <use xlink:href="#DejaVuSans-6c" x="1222.851562"/>
+     <use xlink:href="#DejaVuSans-65" x="1250.634766"/>
+     <use xlink:href="#DejaVuSans-20" x="1312.158203"/>
+     <use xlink:href="#DejaVuSans-3d" x="1343.945312"/>
+     <use xlink:href="#DejaVuSans-20" x="1427.734375"/>
+     <use xlink:href="#DejaVuSans-31" x="1459.521484"/>
+     <use xlink:href="#DejaVuSans-29" x="1523.144531"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 622.715625 74.3525 
+L 712.74375 74.3525 
+Q 714.74375 74.3525 714.74375 72.3525 
+L 714.74375 29.318125 
+Q 714.74375 27.318125 712.74375 27.318125 
+L 622.715625 27.318125 
+Q 620.715625 27.318125 620.715625 29.318125 
+L 620.715625 72.3525 
+Q 620.715625 74.3525 622.715625 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_19">
+     <path d="M 624.715625 35.416562 
+L 634.715625 35.416562 
+L 644.715625 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_19">
+     <!-- location=-2 -->
+     <g transform="translate(652.715625 38.916562) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-2d" d="M 313 2009 
+L 1997 2009 
+L 1997 1497 
+L 313 1497 
+L 313 2009 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-2d" x="480.566406"/>
+      <use xlink:href="#DejaVuSans-32" x="516.650391"/>
+     </g>
+    </g>
+    <g id="line2d_20">
+     <path d="M 624.715625 50.094687 
+L 634.715625 50.094687 
+L 644.715625 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_20">
+     <!-- location=0 -->
+     <g transform="translate(652.715625 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-30" x="480.566406"/>
+     </g>
+    </g>
+    <g id="line2d_21">
+     <path d="M 624.715625 64.772812 
+L 634.715625 64.772812 
+L 644.715625 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- location=2 -->
+     <g transform="translate(652.715625 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-32" x="480.566406"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pd6b03f5331">
+   <rect x="50.14375" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/landau_pdf2.svg b/doc/graphs/landau_pdf2.svg
new file mode 100644
index 0000000000..a3a6f7feb1
--- /dev/null
+++ b/doc/graphs/landau_pdf2.svg
@@ -0,0 +1,1313 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="726.94375pt" height="392.514375pt" viewBox="0 0 726.94375 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:07:43.910893</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 726.94375 392.514375 
+L 726.94375 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+L 713.38125 22.318125 
+L 43.78125 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="m99a8f5eade" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m99a8f5eade" x="43.78125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(36.410156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="127.48125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −4 -->
+      <g transform="translate(120.110156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="211.18125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- 0 -->
+      <g transform="translate(208 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="294.88125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- 4 -->
+      <g transform="translate(291.7 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="378.58125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 8 -->
+      <g transform="translate(375.4 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="462.28125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 12 -->
+      <g transform="translate(455.91875 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-32" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="545.98125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 16 -->
+      <g transform="translate(539.61875 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-36" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="629.68125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 20 -->
+      <g transform="translate(623.31875 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m99a8f5eade" x="713.38125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 24 -->
+      <g transform="translate(707.01875 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+       <use xlink:href="#DejaVuSans-34" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- $x$ -->
+     <g transform="translate(375.58125 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_10">
+      <defs>
+       <path id="ma2a49f3e22" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#ma2a49f3e22" x="43.78125" y="339.838125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 0.0 -->
+      <g transform="translate(20.878125 343.637344) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#ma2a49f3e22" x="43.78125" y="286.555808" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 0.1 -->
+      <g transform="translate(20.878125 290.355027) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#ma2a49f3e22" x="43.78125" y="233.273491" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.2 -->
+      <g transform="translate(20.878125 237.07271) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#ma2a49f3e22" x="43.78125" y="179.991174" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.3 -->
+      <g transform="translate(20.878125 183.790393) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-33" d="M 2597 2516 
+Q 3050 2419 3304 2112 
+Q 3559 1806 3559 1356 
+Q 3559 666 3084 287 
+Q 2609 -91 1734 -91 
+Q 1441 -91 1130 -33 
+Q 819 25 488 141 
+L 488 750 
+Q 750 597 1062 519 
+Q 1375 441 1716 441 
+Q 2309 441 2620 675 
+Q 2931 909 2931 1356 
+Q 2931 1769 2642 2001 
+Q 2353 2234 1838 2234 
+L 1294 2234 
+L 1294 2753 
+L 1863 2753 
+Q 2328 2753 2575 2939 
+Q 2822 3125 2822 3475 
+Q 2822 3834 2567 4026 
+Q 2313 4219 1838 4219 
+Q 1578 4219 1281 4162 
+Q 984 4106 628 3988 
+L 628 4550 
+Q 988 4650 1302 4700 
+Q 1616 4750 1894 4750 
+Q 2613 4750 3031 4423 
+Q 3450 4097 3450 3541 
+Q 3450 3153 3228 2886 
+Q 3006 2619 2597 2516 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-33" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#ma2a49f3e22" x="43.78125" y="126.708857" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.4 -->
+      <g transform="translate(20.878125 130.508076) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-34" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#ma2a49f3e22" x="43.78125" y="73.42654" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.5 -->
+      <g transform="translate(20.878125 77.225759) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_17">
+     <!-- pdf -->
+     <g transform="translate(14.798438 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_16">
+    <path d="M 43.78125 339.838125 
+L 181.591992 339.737286 
+L 182.491113 339.506782 
+L 183.14502 339.126517 
+L 183.717188 338.533364 
+L 184.289355 337.570128 
+L 184.861523 336.084202 
+L 185.433691 333.897762 
+L 186.005859 330.817548 
+L 186.659766 325.953455 
+L 187.313672 319.402635 
+L 188.049316 309.761534 
+L 188.866699 296.046893 
+L 189.847559 275.477625 
+L 191.073633 244.338647 
+L 192.953613 189.702056 
+L 195.242285 124.612835 
+L 196.550098 93.56755 
+L 197.612695 73.16504 
+L 198.593555 58.52075 
+L 199.410938 49.402947 
+L 200.146582 43.509388 
+L 200.800488 40.006686 
+L 201.290918 38.383797 
+L 201.699609 37.647609 
+L 202.026563 37.43943 
+L 202.271777 37.494579 
+L 202.59873 37.836395 
+L 203.007422 38.671151 
+L 203.497852 40.227174 
+L 204.151758 43.15067 
+L 204.969141 47.988704 
+L 205.95 55.218882 
+L 207.257813 66.672729 
+L 209.137793 85.405581 
+L 216.330762 159.195302 
+L 218.619434 179.409144 
+L 220.744629 196.226267 
+L 222.788086 210.660432 
+L 224.831543 223.499902 
+L 226.793262 234.453678 
+L 228.75498 244.196511 
+L 230.716699 252.858178 
+L 232.678418 260.561133 
+L 234.721875 267.686543 
+L 236.765332 274.007495 
+L 238.808789 279.624872 
+L 240.933984 284.815563 
+L 243.05918 289.427402 
+L 245.266113 293.68397 
+L 247.473047 297.468811 
+L 249.761719 300.961378 
+L 252.132129 304.176198 
+L 254.584277 307.129069 
+L 257.118164 309.836493 
+L 259.815527 312.388002 
+L 262.594629 314.710714 
+L 265.537207 316.880404 
+L 268.643262 318.895012 
+L 271.994531 320.799393 
+L 275.591016 322.580967 
+L 279.432715 324.232885 
+L 283.601367 325.781219 
+L 288.178711 327.239734 
+L 293.164746 328.593108 
+L 298.722949 329.867768 
+L 304.935059 331.059058 
+L 311.882813 332.161498 
+L 319.811426 333.189537 
+L 328.884375 334.136775 
+L 339.428613 335.008473 
+L 351.852832 335.805611 
+L 366.647461 336.525455 
+L 384.711621 337.173557 
+L 407.189648 337.747967 
+L 435.879785 338.248639 
+L 473.888086 338.677844 
+L 526.609277 339.036453 
+L 604.342383 339.325153 
+L 713.38125 339.525984 
+L 713.38125 339.525984 
+" clip-path="url(#pe45c7a79a4)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_17">
+    <path d="M 43.78125 339.838125 
+L 162.383496 339.728317 
+L 164.345215 339.481763 
+L 165.734766 339.099463 
+L 166.879102 338.568163 
+L 167.941699 337.826276 
+L 168.922559 336.864731 
+L 169.903418 335.574697 
+L 170.884277 333.894521 
+L 171.865137 331.765881 
+L 172.927734 328.894829 
+L 174.07207 325.090467 
+L 175.298145 320.154883 
+L 176.605957 313.906042 
+L 178.077246 305.72408 
+L 179.79375 294.83803 
+L 182.000684 279.295813 
+L 188.539746 232.207156 
+L 190.419727 220.742417 
+L 192.054492 212.091652 
+L 193.525781 205.463405 
+L 194.833594 200.527386 
+L 196.059668 196.719196 
+L 197.204004 193.866844 
+L 198.266602 191.804997 
+L 199.247461 190.381852 
+L 200.146582 189.462464 
+L 200.963965 188.929739 
+L 201.781348 188.66982 
+L 202.516992 188.655975 
+L 203.334375 188.870172 
+L 204.151758 189.310145 
+L 205.132617 190.113039 
+L 206.195215 191.29061 
+L 207.421289 193.002985 
+L 208.81084 195.340256 
+L 210.445605 198.538964 
+L 212.407324 202.880203 
+L 214.941211 209.060206 
+L 218.78291 219.093476 
+L 226.466309 239.221475 
+L 230.22627 248.377433 
+L 233.577539 255.966476 
+L 236.765332 262.646518 
+L 239.871387 268.645547 
+L 242.895703 274.01569 
+L 245.92002 278.942754 
+L 248.944336 283.453609 
+L 252.050391 287.684131 
+L 255.156445 291.53954 
+L 258.344238 295.140185 
+L 261.61377 298.492921 
+L 264.965039 301.606817 
+L 268.398047 304.492547 
+L 271.994531 307.220767 
+L 275.672754 309.73333 
+L 279.514453 312.092152 
+L 283.601367 314.339268 
+L 287.933496 316.462743 
+L 292.51084 318.455006 
+L 297.415137 320.341858 
+L 302.646387 322.112083 
+L 308.286328 323.781855 
+L 314.416699 325.359516 
+L 321.0375 326.830981 
+L 328.312207 328.217164 
+L 336.322559 329.514525 
+L 345.232031 330.729282 
+L 355.204102 331.861378 
+L 366.483984 332.914391 
+L 379.398633 333.891292 
+L 394.275 334.788078 
+L 411.685254 335.608659 
+L 432.365039 336.353492 
+L 457.295215 337.021515 
+L 488.110547 337.616073 
+L 527.099707 338.136233 
+L 578.186133 338.583988 
+L 647.990625 338.959851 
+L 713.38125 339.178407 
+L 713.38125 339.178407 
+" clip-path="url(#pe45c7a79a4)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_18">
+    <path d="M 43.78125 339.838125 
+L 134.265527 339.729053 
+L 138.679395 339.475705 
+L 141.867188 339.0757 
+L 144.482813 338.526947 
+L 146.771484 337.824605 
+L 148.89668 336.941834 
+L 150.940137 335.847789 
+L 152.901855 334.543818 
+L 154.863574 332.970735 
+L 156.825293 331.115322 
+L 158.86875 328.877082 
+L 161.075684 326.115225 
+L 163.446094 322.772698 
+L 166.061719 318.682117 
+L 169.167773 313.38218 
+L 173.336426 305.786246 
+L 181.428516 290.970793 
+L 184.779785 285.355118 
+L 187.640625 280.986131 
+L 190.25625 277.396648 
+L 192.62666 274.50913 
+L 194.915332 272.06661 
+L 197.122266 270.038747 
+L 199.247461 268.389388 
+L 201.290918 267.079512 
+L 203.334375 266.032467 
+L 205.377832 265.237927 
+L 207.421289 264.683517 
+L 209.464746 264.355325 
+L 211.589941 264.237851 
+L 213.796875 264.338754 
+L 216.167285 264.677351 
+L 218.619434 265.252578 
+L 221.316797 266.117625 
+L 224.259375 267.299503 
+L 227.528906 268.854483 
+L 231.370605 270.93794 
+L 236.029688 273.733474 
+L 242.323535 277.790442 
+L 262.104199 290.706908 
+L 268.888477 294.778895 
+L 275.264063 298.348405 
+L 281.476172 301.572666 
+L 287.688281 304.545745 
+L 293.982129 307.309196 
+L 300.357715 309.866134 
+L 306.978516 312.280391 
+L 313.844531 314.545332 
+L 321.0375 316.68118 
+L 328.557422 318.68111 
+L 336.567773 320.578553 
+L 345.068555 322.36097 
+L 354.141504 324.034749 
+L 363.950098 325.615608 
+L 374.576074 327.10004 
+L 386.18291 328.493548 
+L 398.934082 329.796832 
+L 413.074805 331.014282 
+L 428.850293 332.144893 
+L 446.669238 333.193748 
+L 466.940332 334.158816 
+L 490.31748 335.043371 
+L 517.618066 335.847788 
+L 550.068164 336.574554 
+L 589.384277 337.224587 
+L 638.100293 337.798458 
+L 700.139648 338.296826 
+L 713.38125 338.379916 
+L 713.38125 338.379916 
+" clip-path="url(#pe45c7a79a4)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 43.78125 354.958125 
+L 43.78125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 713.38125 354.958125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 43.78125 22.318125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_18">
+    <!-- Landau Distribution (location = 0) -->
+    <g transform="translate(276.50625 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-4c" d="M 628 4666 
+L 1259 4666 
+L 1259 531 
+L 3531 531 
+L 3531 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-4c"/>
+     <use xlink:href="#DejaVuSans-61" x="55.712891"/>
+     <use xlink:href="#DejaVuSans-6e" x="116.992188"/>
+     <use xlink:href="#DejaVuSans-64" x="180.371094"/>
+     <use xlink:href="#DejaVuSans-61" x="243.847656"/>
+     <use xlink:href="#DejaVuSans-75" x="305.126953"/>
+     <use xlink:href="#DejaVuSans-20" x="368.505859"/>
+     <use xlink:href="#DejaVuSans-44" x="400.292969"/>
+     <use xlink:href="#DejaVuSans-69" x="477.294922"/>
+     <use xlink:href="#DejaVuSans-73" x="505.078125"/>
+     <use xlink:href="#DejaVuSans-74" x="557.177734"/>
+     <use xlink:href="#DejaVuSans-72" x="596.386719"/>
+     <use xlink:href="#DejaVuSans-69" x="637.5"/>
+     <use xlink:href="#DejaVuSans-62" x="665.283203"/>
+     <use xlink:href="#DejaVuSans-75" x="728.759766"/>
+     <use xlink:href="#DejaVuSans-74" x="792.138672"/>
+     <use xlink:href="#DejaVuSans-69" x="831.347656"/>
+     <use xlink:href="#DejaVuSans-6f" x="859.130859"/>
+     <use xlink:href="#DejaVuSans-6e" x="920.3125"/>
+     <use xlink:href="#DejaVuSans-20" x="983.691406"/>
+     <use xlink:href="#DejaVuSans-28" x="1015.478516"/>
+     <use xlink:href="#DejaVuSans-6c" x="1054.492188"/>
+     <use xlink:href="#DejaVuSans-6f" x="1082.275391"/>
+     <use xlink:href="#DejaVuSans-63" x="1143.457031"/>
+     <use xlink:href="#DejaVuSans-61" x="1198.4375"/>
+     <use xlink:href="#DejaVuSans-74" x="1259.716797"/>
+     <use xlink:href="#DejaVuSans-69" x="1298.925781"/>
+     <use xlink:href="#DejaVuSans-6f" x="1326.708984"/>
+     <use xlink:href="#DejaVuSans-6e" x="1387.890625"/>
+     <use xlink:href="#DejaVuSans-20" x="1451.269531"/>
+     <use xlink:href="#DejaVuSans-3d" x="1483.056641"/>
+     <use xlink:href="#DejaVuSans-20" x="1566.845703"/>
+     <use xlink:href="#DejaVuSans-30" x="1598.632812"/>
+     <use xlink:href="#DejaVuSans-29" x="1662.255859"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 624.33125 74.3525 
+L 706.38125 74.3525 
+Q 708.38125 74.3525 708.38125 72.3525 
+L 708.38125 29.318125 
+Q 708.38125 27.318125 706.38125 27.318125 
+L 624.33125 27.318125 
+Q 622.33125 27.318125 622.33125 29.318125 
+L 622.33125 72.3525 
+Q 622.33125 74.3525 624.33125 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_19">
+     <path d="M 626.33125 35.416562 
+L 636.33125 35.416562 
+L 646.33125 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_19">
+     <!-- scale=0.5 -->
+     <g transform="translate(654.33125 38.916562) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-30" x="341.455078"/>
+      <use xlink:href="#DejaVuSans-2e" x="405.078125"/>
+      <use xlink:href="#DejaVuSans-35" x="436.865234"/>
+     </g>
+    </g>
+    <g id="line2d_20">
+     <path d="M 626.33125 50.094687 
+L 636.33125 50.094687 
+L 646.33125 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_20">
+     <!-- scale=1 -->
+     <g transform="translate(654.33125 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-31" x="341.455078"/>
+     </g>
+    </g>
+    <g id="line2d_21">
+     <path d="M 626.33125 64.772812 
+L 636.33125 64.772812 
+L 646.33125 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- scale=2 -->
+     <g transform="translate(654.33125 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-32" x="341.455078"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pe45c7a79a4">
+   <rect x="43.78125" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/landau_pdf_accuracy_64.png b/doc/graphs/landau_pdf_accuracy_64.png
new file mode 100644
index 0000000000..3bb84efec8
Binary files /dev/null and b/doc/graphs/landau_pdf_accuracy_64.png differ
diff --git a/doc/graphs/mapairy_ccdf_accuracy_64.png b/doc/graphs/mapairy_ccdf_accuracy_64.png
new file mode 100644
index 0000000000..8303bf8753
Binary files /dev/null and b/doc/graphs/mapairy_ccdf_accuracy_64.png differ
diff --git a/doc/graphs/mapairy_cdf_accuracy_64.png b/doc/graphs/mapairy_cdf_accuracy_64.png
new file mode 100644
index 0000000000..0576655466
Binary files /dev/null and b/doc/graphs/mapairy_cdf_accuracy_64.png differ
diff --git a/doc/graphs/mapairy_pdf1.svg b/doc/graphs/mapairy_pdf1.svg
new file mode 100644
index 0000000000..1d240be465
--- /dev/null
+++ b/doc/graphs/mapairy_pdf1.svg
@@ -0,0 +1,1465 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="733.30625pt" height="392.514375pt" viewBox="0 0 733.30625 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:36:43.684235</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 733.30625 392.514375 
+L 733.30625 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 50.14375 354.958125 
+L 719.74375 354.958125 
+L 719.74375 22.318125 
+L 50.14375 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="m5bdc386d00" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m5bdc386d00" x="50.14375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(42.772656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="117.10375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −6 -->
+      <g transform="translate(109.732656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-36" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="184.06375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- −4 -->
+      <g transform="translate(176.692656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="251.02375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- −2 -->
+      <g transform="translate(243.652656 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-32" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="317.98375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 0 -->
+      <g transform="translate(314.8025 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="384.94375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 2 -->
+      <g transform="translate(381.7625 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="451.90375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 4 -->
+      <g transform="translate(448.7225 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="518.86375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 6 -->
+      <g transform="translate(515.6825 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="585.82375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 8 -->
+      <g transform="translate(582.6425 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_10">
+     <g id="line2d_10">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="652.78375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_10">
+      <!-- 10 -->
+      <g transform="translate(646.42125 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_11">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#m5bdc386d00" x="719.74375" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 12 -->
+      <g transform="translate(713.38125 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-32" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_12">
+     <!-- $x$ -->
+     <g transform="translate(381.94375 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_12">
+      <defs>
+       <path id="m8240e29c37" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m8240e29c37" x="50.14375" y="339.838125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.00 -->
+      <g transform="translate(20.878125 343.637344) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#m8240e29c37" x="50.14375" y="285.670096" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.05 -->
+      <g transform="translate(20.878125 289.469315) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#m8240e29c37" x="50.14375" y="231.502068" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.10 -->
+      <g transform="translate(20.878125 235.301287) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#m8240e29c37" x="50.14375" y="177.334039" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.15 -->
+      <g transform="translate(20.878125 181.133258) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_16">
+      <g>
+       <use xlink:href="#m8240e29c37" x="50.14375" y="123.166011" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_17">
+      <!-- 0.20 -->
+      <g transform="translate(20.878125 126.965229) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-30" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_17">
+      <g>
+       <use xlink:href="#m8240e29c37" x="50.14375" y="68.997982" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_18">
+      <!-- 0.25 -->
+      <g transform="translate(20.878125 72.797201) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+       <use xlink:href="#DejaVuSans-35" x="159.033203"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_19">
+     <!-- pdf -->
+     <g transform="translate(14.798437 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_18">
+    <path d="M 50.012969 339.838048 
+L 87.285625 339.727841 
+L 95.263281 339.478755 
+L 100.625313 339.094665 
+L 104.810313 338.573386 
+L 108.210625 337.934222 
+L 111.218594 337.148992 
+L 113.965 336.200864 
+L 116.449844 335.108869 
+L 118.803906 333.829078 
+L 121.027188 332.364657 
+L 123.119688 330.726502 
+L 125.212188 328.804006 
+L 127.304688 326.564378 
+L 129.397188 323.974259 
+L 131.358906 321.198081 
+L 133.451406 317.834776 
+L 135.543906 314.025992 
+L 137.636406 309.742896 
+L 139.728906 304.95982 
+L 141.952188 299.305863 
+L 144.175469 293.043255 
+L 146.529531 285.735065 
+L 149.014375 277.260483 
+L 151.63 267.505893 
+L 154.376406 256.374282 
+L 157.384375 243.204398 
+L 160.653906 227.853811 
+L 164.446563 208.936119 
+L 169.41625 182.927758 
+L 179.87875 127.864058 
+L 183.540625 109.933659 
+L 186.679375 95.638266 
+L 189.425781 84.134716 
+L 191.910625 74.653377 
+L 194.133906 66.988666 
+L 196.226406 60.5283 
+L 198.188125 55.166031 
+L 200.019063 50.786698 
+L 201.719219 47.271901 
+L 203.288594 44.504463 
+L 204.727188 42.371803 
+L 206.165781 40.625517 
+L 207.473594 39.371679 
+L 208.781406 38.432978 
+L 209.958438 37.85459 
+L 211.135469 37.525157 
+L 212.3125 37.440702 
+L 213.489531 37.596667 
+L 214.666563 37.98795 
+L 215.843594 38.608942 
+L 217.151406 39.560943 
+L 218.59 40.916333 
+L 220.028594 42.582249 
+L 221.597969 44.737961 
+L 223.298125 47.451067 
+L 225.129063 50.785147 
+L 227.221563 55.079946 
+L 229.444844 60.158315 
+L 231.929688 66.392721 
+L 234.806875 74.246248 
+L 238.076406 83.849674 
+L 241.999844 96.095368 
+L 247.361875 113.633873 
+L 262.401719 163.237389 
+L 267.371406 178.641086 
+L 271.817969 191.714802 
+L 276.002969 203.331622 
+L 279.926406 213.579783 
+L 283.719063 222.880218 
+L 287.380938 231.293043 
+L 291.042813 239.156227 
+L 294.573906 246.230994 
+L 298.105 252.824452 
+L 301.636094 258.956655 
+L 305.167188 264.649854 
+L 308.698281 269.927678 
+L 312.229375 274.814471 
+L 315.760469 279.334742 
+L 319.422344 283.661229 
+L 323.084219 287.64601 
+L 326.876875 291.439988 
+L 330.669531 294.921606 
+L 334.592969 298.221629 
+L 338.647188 301.337041 
+L 342.832188 304.267151 
+L 347.147969 307.013346 
+L 351.594531 309.578824 
+L 356.302656 312.033109 
+L 361.141563 314.304567 
+L 366.242031 316.454055 
+L 371.604063 318.474342 
+L 377.358438 320.402774 
+L 383.505156 322.223858 
+L 390.044219 323.927102 
+L 397.106406 325.53432 
+L 404.8225 327.056327 
+L 413.1925 328.475355 
+L 422.347188 329.798102 
+L 432.417344 331.026355 
+L 443.664531 332.1713 
+L 456.350313 333.234466 
+L 470.73625 334.211993 
+L 487.345469 335.110811 
+L 506.570313 335.922666 
+L 529.32625 336.655032 
+L 556.790313 337.309004 
+L 590.662656 337.884521 
+L 633.689688 338.382929 
+L 690.317969 338.804535 
+L 719.874531 338.958918 
+L 719.874531 338.958918 
+" clip-path="url(#pa4cd9bcd56)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_19">
+    <path d="M 50.012969 339.838125 
+L 154.245625 339.727841 
+L 162.223281 339.478755 
+L 167.585313 339.094665 
+L 171.770313 338.573386 
+L 175.170625 337.934222 
+L 178.178594 337.148992 
+L 180.925 336.200864 
+L 183.409844 335.108869 
+L 185.763906 333.829078 
+L 187.987188 332.364657 
+L 190.079688 330.726502 
+L 192.172188 328.804006 
+L 194.264688 326.564378 
+L 196.357188 323.974259 
+L 198.318906 321.198081 
+L 200.411406 317.834776 
+L 202.503906 314.025992 
+L 204.596406 309.742896 
+L 206.688906 304.95982 
+L 208.912188 299.305863 
+L 211.135469 293.043255 
+L 213.489531 285.735065 
+L 215.974375 277.260483 
+L 218.59 267.505893 
+L 221.336406 256.374282 
+L 224.344375 243.204398 
+L 227.613906 227.853811 
+L 231.406563 208.936119 
+L 236.37625 182.927758 
+L 246.83875 127.864058 
+L 250.500625 109.933659 
+L 253.639375 95.638266 
+L 256.385781 84.134716 
+L 258.870625 74.653377 
+L 261.093906 66.988666 
+L 263.186406 60.5283 
+L 265.148125 55.166031 
+L 266.979063 50.786698 
+L 268.679219 47.271901 
+L 270.248594 44.504463 
+L 271.687188 42.371803 
+L 273.125781 40.625517 
+L 274.433594 39.371679 
+L 275.741406 38.432978 
+L 276.918438 37.85459 
+L 278.095469 37.525157 
+L 279.2725 37.440702 
+L 280.449531 37.596667 
+L 281.626563 37.98795 
+L 282.803594 38.608942 
+L 284.111406 39.560943 
+L 285.55 40.916333 
+L 286.988594 42.582249 
+L 288.557969 44.737961 
+L 290.258125 47.451067 
+L 292.089063 50.785147 
+L 294.181563 55.079946 
+L 296.404844 60.158315 
+L 298.889688 66.392721 
+L 301.766875 74.246248 
+L 305.036406 83.849674 
+L 308.959844 96.095368 
+L 314.321875 113.633873 
+L 329.361719 163.237389 
+L 334.331406 178.641086 
+L 338.777969 191.714802 
+L 342.962969 203.331622 
+L 346.886406 213.579783 
+L 350.679063 222.880218 
+L 354.340938 231.293043 
+L 358.002813 239.156227 
+L 361.533906 246.230994 
+L 365.065 252.824452 
+L 368.596094 258.956655 
+L 372.127188 264.649854 
+L 375.658281 269.927678 
+L 379.189375 274.814471 
+L 382.720469 279.334742 
+L 386.382344 283.661229 
+L 390.044219 287.64601 
+L 393.836875 291.439988 
+L 397.629531 294.921606 
+L 401.552969 298.221629 
+L 405.607188 301.337041 
+L 409.792188 304.267151 
+L 414.107969 307.013346 
+L 418.554531 309.578824 
+L 423.262656 312.033109 
+L 428.101563 314.304567 
+L 433.202031 316.454055 
+L 438.564063 318.474342 
+L 444.318438 320.402774 
+L 450.465156 322.223858 
+L 457.004219 323.927102 
+L 464.066406 325.53432 
+L 471.7825 327.056327 
+L 480.1525 328.475355 
+L 489.307188 329.798102 
+L 499.377344 331.026355 
+L 510.624531 332.1713 
+L 523.310313 333.234466 
+L 537.69625 334.211993 
+L 554.305469 335.110811 
+L 573.530313 335.922666 
+L 596.28625 336.655032 
+L 623.750313 337.309004 
+L 657.622656 337.884521 
+L 700.649688 338.382929 
+L 719.874531 338.549252 
+L 719.874531 338.549252 
+" clip-path="url(#pa4cd9bcd56)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_20">
+    <path d="M 50.012969 339.838125 
+L 221.205625 339.727841 
+L 229.183281 339.478755 
+L 234.545313 339.094665 
+L 238.730313 338.573386 
+L 242.130625 337.934222 
+L 245.138594 337.148992 
+L 247.885 336.200864 
+L 250.369844 335.108869 
+L 252.723906 333.829078 
+L 254.947188 332.364657 
+L 257.039688 330.726502 
+L 259.132188 328.804006 
+L 261.224688 326.564378 
+L 263.317188 323.974259 
+L 265.278906 321.198081 
+L 267.371406 317.834776 
+L 269.463906 314.025992 
+L 271.556406 309.742896 
+L 273.648906 304.95982 
+L 275.872188 299.305863 
+L 278.095469 293.043255 
+L 280.449531 285.735065 
+L 282.934375 277.260483 
+L 285.55 267.505893 
+L 288.296406 256.374282 
+L 291.304375 243.204398 
+L 294.573906 227.853811 
+L 298.366563 208.936119 
+L 303.33625 182.927758 
+L 313.79875 127.864058 
+L 317.460625 109.933659 
+L 320.599375 95.638266 
+L 323.345781 84.134716 
+L 325.830625 74.653377 
+L 328.053906 66.988666 
+L 330.146406 60.5283 
+L 332.108125 55.166031 
+L 333.939063 50.786698 
+L 335.639219 47.271901 
+L 337.208594 44.504463 
+L 338.647188 42.371803 
+L 340.085781 40.625517 
+L 341.393594 39.371679 
+L 342.701406 38.432978 
+L 343.878438 37.85459 
+L 345.055469 37.525157 
+L 346.2325 37.440702 
+L 347.409531 37.596667 
+L 348.586563 37.98795 
+L 349.763594 38.608942 
+L 351.071406 39.560943 
+L 352.51 40.916333 
+L 353.948594 42.582249 
+L 355.517969 44.737961 
+L 357.218125 47.451067 
+L 359.049063 50.785147 
+L 361.141563 55.079946 
+L 363.364844 60.158315 
+L 365.849688 66.392721 
+L 368.726875 74.246248 
+L 371.996406 83.849674 
+L 375.919844 96.095368 
+L 381.281875 113.633873 
+L 396.321719 163.237389 
+L 401.291406 178.641086 
+L 405.737969 191.714802 
+L 409.922969 203.331622 
+L 413.846406 213.579783 
+L 417.639063 222.880218 
+L 421.300938 231.293043 
+L 424.962813 239.156227 
+L 428.493906 246.230994 
+L 432.025 252.824452 
+L 435.556094 258.956655 
+L 439.087188 264.649854 
+L 442.618281 269.927678 
+L 446.149375 274.814471 
+L 449.680469 279.334742 
+L 453.342344 283.661229 
+L 457.004219 287.64601 
+L 460.796875 291.439988 
+L 464.589531 294.921606 
+L 468.512969 298.221629 
+L 472.567188 301.337041 
+L 476.752188 304.267151 
+L 481.067969 307.013346 
+L 485.514531 309.578824 
+L 490.222656 312.033109 
+L 495.061563 314.304567 
+L 500.162031 316.454055 
+L 505.524063 318.474342 
+L 511.278438 320.402774 
+L 517.425156 322.223858 
+L 523.964219 323.927102 
+L 531.026406 325.53432 
+L 538.7425 327.056327 
+L 547.1125 328.475355 
+L 556.267188 329.798102 
+L 566.337344 331.026355 
+L 577.584531 332.1713 
+L 590.270313 333.234466 
+L 604.65625 334.211993 
+L 621.265469 335.110811 
+L 640.490313 335.922666 
+L 663.24625 336.655032 
+L 690.710313 337.309004 
+L 719.874531 337.816187 
+L 719.874531 337.816187 
+" clip-path="url(#pa4cd9bcd56)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 50.14375 354.958125 
+L 50.14375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 719.74375 354.958125 
+L 719.74375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 50.14375 354.958125 
+L 719.74375 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 50.14375 22.318125 
+L 719.74375 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_20">
+    <!-- Map-Airy Distribution (scale = 1) -->
+    <g transform="translate(286.844688 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-4d" d="M 628 4666 
+L 1569 4666 
+L 2759 1491 
+L 3956 4666 
+L 4897 4666 
+L 4897 0 
+L 4281 0 
+L 4281 4097 
+L 3078 897 
+L 2444 897 
+L 1241 4097 
+L 1241 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-2d" d="M 313 2009 
+L 1997 2009 
+L 1997 1497 
+L 313 1497 
+L 313 2009 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-41" d="M 2188 4044 
+L 1331 1722 
+L 3047 1722 
+L 2188 4044 
+z
+M 1831 4666 
+L 2547 4666 
+L 4325 0 
+L 3669 0 
+L 3244 1197 
+L 1141 1197 
+L 716 0 
+L 50 0 
+L 1831 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-79" d="M 2059 -325 
+Q 1816 -950 1584 -1140 
+Q 1353 -1331 966 -1331 
+L 506 -1331 
+L 506 -850 
+L 844 -850 
+Q 1081 -850 1212 -737 
+Q 1344 -625 1503 -206 
+L 1606 56 
+L 191 3500 
+L 800 3500 
+L 1894 763 
+L 2988 3500 
+L 3597 3500 
+L 2059 -325 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-4d"/>
+     <use xlink:href="#DejaVuSans-61" x="86.279297"/>
+     <use xlink:href="#DejaVuSans-70" x="147.558594"/>
+     <use xlink:href="#DejaVuSans-2d" x="211.035156"/>
+     <use xlink:href="#DejaVuSans-41" x="244.869141"/>
+     <use xlink:href="#DejaVuSans-69" x="313.277344"/>
+     <use xlink:href="#DejaVuSans-72" x="341.060547"/>
+     <use xlink:href="#DejaVuSans-79" x="382.173828"/>
+     <use xlink:href="#DejaVuSans-20" x="441.353516"/>
+     <use xlink:href="#DejaVuSans-44" x="473.140625"/>
+     <use xlink:href="#DejaVuSans-69" x="550.142578"/>
+     <use xlink:href="#DejaVuSans-73" x="577.925781"/>
+     <use xlink:href="#DejaVuSans-74" x="630.025391"/>
+     <use xlink:href="#DejaVuSans-72" x="669.234375"/>
+     <use xlink:href="#DejaVuSans-69" x="710.347656"/>
+     <use xlink:href="#DejaVuSans-62" x="738.130859"/>
+     <use xlink:href="#DejaVuSans-75" x="801.607422"/>
+     <use xlink:href="#DejaVuSans-74" x="864.986328"/>
+     <use xlink:href="#DejaVuSans-69" x="904.195312"/>
+     <use xlink:href="#DejaVuSans-6f" x="931.978516"/>
+     <use xlink:href="#DejaVuSans-6e" x="993.160156"/>
+     <use xlink:href="#DejaVuSans-20" x="1056.539062"/>
+     <use xlink:href="#DejaVuSans-28" x="1088.326172"/>
+     <use xlink:href="#DejaVuSans-73" x="1127.339844"/>
+     <use xlink:href="#DejaVuSans-63" x="1179.439453"/>
+     <use xlink:href="#DejaVuSans-61" x="1234.419922"/>
+     <use xlink:href="#DejaVuSans-6c" x="1295.699219"/>
+     <use xlink:href="#DejaVuSans-65" x="1323.482422"/>
+     <use xlink:href="#DejaVuSans-20" x="1385.005859"/>
+     <use xlink:href="#DejaVuSans-3d" x="1416.792969"/>
+     <use xlink:href="#DejaVuSans-20" x="1500.582031"/>
+     <use xlink:href="#DejaVuSans-31" x="1532.369141"/>
+     <use xlink:href="#DejaVuSans-29" x="1595.992188"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 622.715625 74.3525 
+L 712.74375 74.3525 
+Q 714.74375 74.3525 714.74375 72.3525 
+L 714.74375 29.318125 
+Q 714.74375 27.318125 712.74375 27.318125 
+L 622.715625 27.318125 
+Q 620.715625 27.318125 620.715625 29.318125 
+L 620.715625 72.3525 
+Q 620.715625 74.3525 622.715625 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_21">
+     <path d="M 624.715625 35.416562 
+L 634.715625 35.416562 
+L 644.715625 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- location=-2 -->
+     <g transform="translate(652.715625 38.916562) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-2d" x="480.566406"/>
+      <use xlink:href="#DejaVuSans-32" x="516.650391"/>
+     </g>
+    </g>
+    <g id="line2d_22">
+     <path d="M 624.715625 50.094687 
+L 634.715625 50.094687 
+L 644.715625 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_22">
+     <!-- location=0 -->
+     <g transform="translate(652.715625 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-30" x="480.566406"/>
+     </g>
+    </g>
+    <g id="line2d_23">
+     <path d="M 624.715625 64.772812 
+L 634.715625 64.772812 
+L 644.715625 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_23">
+     <!-- location=2 -->
+     <g transform="translate(652.715625 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-32" x="480.566406"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pa4cd9bcd56">
+   <rect x="50.14375" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/mapairy_pdf2.svg b/doc/graphs/mapairy_pdf2.svg
new file mode 100644
index 0000000000..c77c269bed
--- /dev/null
+++ b/doc/graphs/mapairy_pdf2.svg
@@ -0,0 +1,1414 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="726.94375pt" height="392.514375pt" viewBox="0 0 726.94375 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:36:43.325220</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 726.94375 392.514375 
+L 726.94375 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+L 713.38125 22.318125 
+L 43.78125 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="m75a61a3b55" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m75a61a3b55" x="43.78125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(36.410156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="110.74125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −6 -->
+      <g transform="translate(103.370156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-36" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="177.70125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- −4 -->
+      <g transform="translate(170.330156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="244.66125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- −2 -->
+      <g transform="translate(237.290156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-32" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="311.62125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 0 -->
+      <g transform="translate(308.44 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="378.58125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 2 -->
+      <g transform="translate(375.4 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="445.54125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 4 -->
+      <g transform="translate(442.36 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="512.50125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 6 -->
+      <g transform="translate(509.32 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="579.46125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 8 -->
+      <g transform="translate(576.28 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_10">
+     <g id="line2d_10">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="646.42125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_10">
+      <!-- 10 -->
+      <g transform="translate(640.05875 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_11">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#m75a61a3b55" x="713.38125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 12 -->
+      <g transform="translate(707.01875 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-32" x="63.623047"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_12">
+     <!-- $x$ -->
+     <g transform="translate(375.58125 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_12">
+      <defs>
+       <path id="m805530b311" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m805530b311" x="43.78125" y="339.838125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.0 -->
+      <g transform="translate(20.878125 343.637344) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#m805530b311" x="43.78125" y="285.670025" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.1 -->
+      <g transform="translate(20.878125 289.469244) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#m805530b311" x="43.78125" y="231.501925" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.2 -->
+      <g transform="translate(20.878125 235.301144) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#m805530b311" x="43.78125" y="177.333825" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.3 -->
+      <g transform="translate(20.878125 181.133044) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-33" d="M 2597 2516 
+Q 3050 2419 3304 2112 
+Q 3559 1806 3559 1356 
+Q 3559 666 3084 287 
+Q 2609 -91 1734 -91 
+Q 1441 -91 1130 -33 
+Q 819 25 488 141 
+L 488 750 
+Q 750 597 1062 519 
+Q 1375 441 1716 441 
+Q 2309 441 2620 675 
+Q 2931 909 2931 1356 
+Q 2931 1769 2642 2001 
+Q 2353 2234 1838 2234 
+L 1294 2234 
+L 1294 2753 
+L 1863 2753 
+Q 2328 2753 2575 2939 
+Q 2822 3125 2822 3475 
+Q 2822 3834 2567 4026 
+Q 2313 4219 1838 4219 
+Q 1578 4219 1281 4162 
+Q 984 4106 628 3988 
+L 628 4550 
+Q 988 4650 1302 4700 
+Q 1616 4750 1894 4750 
+Q 2613 4750 3031 4423 
+Q 3450 4097 3450 3541 
+Q 3450 3153 3228 2886 
+Q 3006 2619 2597 2516 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-33" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_16">
+      <g>
+       <use xlink:href="#m805530b311" x="43.78125" y="123.165725" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_17">
+      <!-- 0.4 -->
+      <g transform="translate(20.878125 126.964944) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-34" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_17">
+      <g>
+       <use xlink:href="#m805530b311" x="43.78125" y="68.997625" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_18">
+      <!-- 0.5 -->
+      <g transform="translate(20.878125 72.796844) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_19">
+     <!-- pdf -->
+     <g transform="translate(14.798438 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_18">
+    <path d="M 43.650469 339.838125 
+L 229.752188 339.72784 
+L 233.675625 339.485304 
+L 236.29125 339.119693 
+L 238.38375 338.613581 
+L 240.083906 337.991884 
+L 241.653281 337.188217 
+L 243.091875 336.200859 
+L 244.399688 335.044397 
+L 245.7075 333.589744 
+L 247.015313 331.781618 
+L 248.323125 329.560313 
+L 249.630938 326.862796 
+L 250.93875 323.624165 
+L 252.246563 319.779471 
+L 253.554375 315.265856 
+L 254.992969 309.458808 
+L 256.431563 302.704263 
+L 258.000938 294.193245 
+L 259.570312 284.448772 
+L 261.270469 272.488722 
+L 263.101406 258.018332 
+L 265.193906 239.612682 
+L 267.67875 215.574795 
+L 270.948281 181.536126 
+L 276.833438 120.037043 
+L 279.1875 97.940673 
+L 281.149219 81.549897 
+L 282.849375 69.158985 
+L 284.287969 60.149257 
+L 285.595781 53.213988 
+L 286.772813 48.035077 
+L 287.819063 44.294211 
+L 288.865313 41.371078 
+L 289.780781 39.482434 
+L 290.565469 38.355912 
+L 291.219375 37.7596 
+L 291.873281 37.4696 
+L 292.396406 37.454357 
+L 292.919531 37.628223 
+L 293.573438 38.105906 
+L 294.227344 38.865552 
+L 295.012031 40.13731 
+L 295.9275 42.097475 
+L 296.97375 44.932607 
+L 298.150781 48.828931 
+L 299.458594 53.95983 
+L 301.027969 61.105008 
+L 302.858906 70.598843 
+L 304.951406 82.664185 
+L 307.697813 99.883463 
+L 312.405938 131.104603 
+L 317.506406 164.48022 
+L 320.906719 185.267825 
+L 323.914688 202.272 
+L 326.661094 216.530021 
+L 329.276719 228.945776 
+L 331.761563 239.697052 
+L 334.246406 249.46441 
+L 336.600469 257.854762 
+L 338.954531 265.457283 
+L 341.308594 272.327789 
+L 343.662656 278.524114 
+L 346.016719 284.103983 
+L 348.370781 289.123489 
+L 350.724844 293.63605 
+L 353.078906 297.691753 
+L 355.56375 301.528342 
+L 358.048594 304.957477 
+L 360.533438 308.024346 
+L 363.149063 310.905852 
+L 365.895469 313.593963 
+L 368.641875 315.978743 
+L 371.519063 318.192829 
+L 374.527031 320.236847 
+L 377.796563 322.187425 
+L 381.196875 323.958923 
+L 384.85875 325.617253 
+L 388.782188 327.151636 
+L 393.097969 328.597479 
+L 397.806094 329.935958 
+L 403.037344 331.185087 
+L 408.791719 332.326613 
+L 415.330781 333.3903 
+L 422.654531 334.35284 
+L 431.155313 335.240134 
+L 441.094688 336.045931 
+L 452.865 336.768736 
+L 467.120156 337.412045 
+L 484.775625 337.97595 
+L 507.27 338.461091 
+L 537.088125 338.869529 
+L 578.807344 339.203183 
+L 641.451563 339.462552 
+L 713.512031 339.608784 
+L 713.512031 339.608784 
+" clip-path="url(#p3fc5f6c49d)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_19">
+    <path d="M 43.650469 339.838125 
+L 152.460469 339.727897 
+L 160.96125 339.478909 
+L 166.715625 339.096256 
+L 171.162188 338.589169 
+L 174.954844 337.941246 
+L 178.355156 337.134011 
+L 181.363125 336.194572 
+L 184.109531 335.113391 
+L 186.725156 333.85163 
+L 189.21 332.413427 
+L 191.694844 330.713402 
+L 194.048906 328.836436 
+L 196.402969 326.677597 
+L 198.757031 324.216502 
+L 201.111094 321.43539 
+L 203.465156 318.319907 
+L 205.95 314.657404 
+L 208.434844 310.604978 
+L 211.050469 305.919001 
+L 213.796875 300.54577 
+L 216.804844 294.155886 
+L 220.074375 286.666053 
+L 223.73625 277.70646 
+L 228.313594 265.884846 
+L 242.307188 229.295125 
+L 245.838281 220.944079 
+L 248.84625 214.388915 
+L 251.592656 208.937898 
+L 254.0775 204.498555 
+L 256.300781 200.955299 
+L 258.393281 198.010666 
+L 260.355 195.606278 
+L 262.316719 193.55482 
+L 264.147656 191.962835 
+L 265.847813 190.764942 
+L 267.547969 189.836479 
+L 269.117344 189.216698 
+L 270.686719 188.821743 
+L 272.256094 188.647681 
+L 273.825469 188.689606 
+L 275.394844 188.94172 
+L 277.095 189.444353 
+L 278.795156 190.176533 
+L 280.626094 191.209991 
+L 282.587813 192.583872 
+L 284.680313 194.333528 
+L 286.903594 196.488963 
+L 289.388438 199.225308 
+L 292.134844 202.605001 
+L 295.142813 206.67246 
+L 298.673906 211.843731 
+L 302.858906 218.385604 
+L 308.744063 228.041354 
+L 322.476094 250.705671 
+L 327.838125 259.041897 
+L 332.54625 265.963071 
+L 336.992813 272.111385 
+L 341.177813 277.531982 
+L 345.362813 282.589254 
+L 349.417031 287.142523 
+L 353.47125 291.361915 
+L 357.525469 295.258468 
+L 361.579688 298.846443 
+L 365.633906 302.142288 
+L 369.818906 305.256882 
+L 374.003906 308.099394 
+L 378.319688 310.767314 
+L 382.76625 313.259248 
+L 387.474375 315.639024 
+L 392.313281 317.834291 
+L 397.41375 319.901748 
+L 402.775781 321.832711 
+L 408.399375 323.62242 
+L 414.415313 325.303847 
+L 420.954375 326.895203 
+L 428.016563 328.377797 
+L 435.601875 329.739949 
+L 443.971875 331.012775 
+L 453.257344 332.193314 
+L 463.589063 333.276641 
+L 475.228594 334.26803 
+L 488.568281 335.174212 
+L 504.000469 335.992567 
+L 522.179063 336.726484 
+L 544.019531 337.377691 
+L 570.960469 337.948967 
+L 605.094375 338.439938 
+L 650.083125 338.852932 
+L 712.596563 339.190036 
+L 713.512031 339.193688 
+L 713.512031 339.193688 
+" clip-path="url(#p3fc5f6c49d)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_20">
+    <path d="M 43.650469 338.581808 
+L 52.15125 337.922176 
+L 59.736562 337.111867 
+L 66.537188 336.166765 
+L 72.945469 335.05376 
+L 78.961406 333.785823 
+L 84.715781 332.34963 
+L 90.339375 330.718339 
+L 95.832188 328.89537 
+L 101.325 326.837436 
+L 106.817813 324.541153 
+L 112.441406 321.945232 
+L 118.195781 319.040948 
+L 124.211719 315.753788 
+L 130.750781 311.921136 
+L 138.074531 307.363252 
+L 147.36 301.305124 
+L 169.854375 286.515124 
+L 176.916563 282.207587 
+L 183.063281 278.716718 
+L 188.556094 275.84688 
+L 193.787344 273.364777 
+L 198.757031 271.256419 
+L 203.465156 269.4992 
+L 208.0425 268.025668 
+L 212.489063 266.822938 
+L 216.935625 265.849613 
+L 221.382188 265.107083 
+L 225.82875 264.59428 
+L 230.275313 264.30782 
+L 234.721875 264.242183 
+L 239.299219 264.397408 
+L 244.007344 264.782403 
+L 248.84625 265.402962 
+L 253.946719 266.286946 
+L 259.30875 267.449449 
+L 264.932344 268.899476 
+L 271.079063 270.721212 
+L 277.748906 272.934809 
+L 285.334219 275.692529 
+L 294.488906 279.269367 
+L 307.305469 284.539417 
+L 332.938594 295.115434 
+L 344.447344 299.588158 
+L 354.779063 303.364893 
+L 364.587656 306.712257 
+L 374.134688 309.734291 
+L 383.550938 312.483095 
+L 393.097969 315.037885 
+L 402.775781 317.397 
+L 412.715156 319.590356 
+L 423.046875 321.639919 
+L 433.770938 323.538614 
+L 445.018125 325.30272 
+L 456.919219 326.942645 
+L 469.605 328.4642 
+L 483.20625 329.869937 
+L 497.984531 331.171071 
+L 514.201406 332.371211 
+L 532.118438 333.469259 
+L 552.127969 334.467993 
+L 574.753125 335.370324 
+L 600.909375 336.184764 
+L 631.512188 336.908493 
+L 668.261719 337.547066 
+L 713.38125 338.100074 
+L 713.512031 338.1014 
+L 713.512031 338.1014 
+" clip-path="url(#p3fc5f6c49d)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 43.78125 354.958125 
+L 43.78125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 713.38125 354.958125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 43.78125 22.318125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_20">
+    <!-- Map-Airy Distribution (location = 0) -->
+    <g transform="translate(272.135625 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-4d" d="M 628 4666 
+L 1569 4666 
+L 2759 1491 
+L 3956 4666 
+L 4897 4666 
+L 4897 0 
+L 4281 0 
+L 4281 4097 
+L 3078 897 
+L 2444 897 
+L 1241 4097 
+L 1241 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-2d" d="M 313 2009 
+L 1997 2009 
+L 1997 1497 
+L 313 1497 
+L 313 2009 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-41" d="M 2188 4044 
+L 1331 1722 
+L 3047 1722 
+L 2188 4044 
+z
+M 1831 4666 
+L 2547 4666 
+L 4325 0 
+L 3669 0 
+L 3244 1197 
+L 1141 1197 
+L 716 0 
+L 50 0 
+L 1831 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-79" d="M 2059 -325 
+Q 1816 -950 1584 -1140 
+Q 1353 -1331 966 -1331 
+L 506 -1331 
+L 506 -850 
+L 844 -850 
+Q 1081 -850 1212 -737 
+Q 1344 -625 1503 -206 
+L 1606 56 
+L 191 3500 
+L 800 3500 
+L 1894 763 
+L 2988 3500 
+L 3597 3500 
+L 2059 -325 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-4d"/>
+     <use xlink:href="#DejaVuSans-61" x="86.279297"/>
+     <use xlink:href="#DejaVuSans-70" x="147.558594"/>
+     <use xlink:href="#DejaVuSans-2d" x="211.035156"/>
+     <use xlink:href="#DejaVuSans-41" x="244.869141"/>
+     <use xlink:href="#DejaVuSans-69" x="313.277344"/>
+     <use xlink:href="#DejaVuSans-72" x="341.060547"/>
+     <use xlink:href="#DejaVuSans-79" x="382.173828"/>
+     <use xlink:href="#DejaVuSans-20" x="441.353516"/>
+     <use xlink:href="#DejaVuSans-44" x="473.140625"/>
+     <use xlink:href="#DejaVuSans-69" x="550.142578"/>
+     <use xlink:href="#DejaVuSans-73" x="577.925781"/>
+     <use xlink:href="#DejaVuSans-74" x="630.025391"/>
+     <use xlink:href="#DejaVuSans-72" x="669.234375"/>
+     <use xlink:href="#DejaVuSans-69" x="710.347656"/>
+     <use xlink:href="#DejaVuSans-62" x="738.130859"/>
+     <use xlink:href="#DejaVuSans-75" x="801.607422"/>
+     <use xlink:href="#DejaVuSans-74" x="864.986328"/>
+     <use xlink:href="#DejaVuSans-69" x="904.195312"/>
+     <use xlink:href="#DejaVuSans-6f" x="931.978516"/>
+     <use xlink:href="#DejaVuSans-6e" x="993.160156"/>
+     <use xlink:href="#DejaVuSans-20" x="1056.539062"/>
+     <use xlink:href="#DejaVuSans-28" x="1088.326172"/>
+     <use xlink:href="#DejaVuSans-6c" x="1127.339844"/>
+     <use xlink:href="#DejaVuSans-6f" x="1155.123047"/>
+     <use xlink:href="#DejaVuSans-63" x="1216.304688"/>
+     <use xlink:href="#DejaVuSans-61" x="1271.285156"/>
+     <use xlink:href="#DejaVuSans-74" x="1332.564453"/>
+     <use xlink:href="#DejaVuSans-69" x="1371.773438"/>
+     <use xlink:href="#DejaVuSans-6f" x="1399.556641"/>
+     <use xlink:href="#DejaVuSans-6e" x="1460.738281"/>
+     <use xlink:href="#DejaVuSans-20" x="1524.117188"/>
+     <use xlink:href="#DejaVuSans-3d" x="1555.904297"/>
+     <use xlink:href="#DejaVuSans-20" x="1639.693359"/>
+     <use xlink:href="#DejaVuSans-30" x="1671.480469"/>
+     <use xlink:href="#DejaVuSans-29" x="1735.103516"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 624.33125 74.3525 
+L 706.38125 74.3525 
+Q 708.38125 74.3525 708.38125 72.3525 
+L 708.38125 29.318125 
+Q 708.38125 27.318125 706.38125 27.318125 
+L 624.33125 27.318125 
+Q 622.33125 27.318125 622.33125 29.318125 
+L 622.33125 72.3525 
+Q 622.33125 74.3525 624.33125 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_21">
+     <path d="M 626.33125 35.416562 
+L 636.33125 35.416562 
+L 646.33125 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- scale=0.5 -->
+     <g transform="translate(654.33125 38.916562) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-30" x="341.455078"/>
+      <use xlink:href="#DejaVuSans-2e" x="405.078125"/>
+      <use xlink:href="#DejaVuSans-35" x="436.865234"/>
+     </g>
+    </g>
+    <g id="line2d_22">
+     <path d="M 626.33125 50.094687 
+L 636.33125 50.094687 
+L 646.33125 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_22">
+     <!-- scale=1 -->
+     <g transform="translate(654.33125 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-31" x="341.455078"/>
+     </g>
+    </g>
+    <g id="line2d_23">
+     <path d="M 626.33125 64.772812 
+L 636.33125 64.772812 
+L 646.33125 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_23">
+     <!-- scale=2 -->
+     <g transform="translate(654.33125 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-32" x="341.455078"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="p3fc5f6c49d">
+   <rect x="43.78125" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/mapairy_pdf_accuracy_64.png b/doc/graphs/mapairy_pdf_accuracy_64.png
new file mode 100644
index 0000000000..45b1a1e828
Binary files /dev/null and b/doc/graphs/mapairy_pdf_accuracy_64.png differ
diff --git a/doc/graphs/saspoint5_ccdf_accuracy_64.png b/doc/graphs/saspoint5_ccdf_accuracy_64.png
new file mode 100644
index 0000000000..a1036160f4
Binary files /dev/null and b/doc/graphs/saspoint5_ccdf_accuracy_64.png differ
diff --git a/doc/graphs/saspoint5_pdf1.svg b/doc/graphs/saspoint5_pdf1.svg
new file mode 100644
index 0000000000..7c820ae225
--- /dev/null
+++ b/doc/graphs/saspoint5_pdf1.svg
@@ -0,0 +1,1445 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="723.7625pt" height="392.514375pt" viewBox="0 0 723.7625 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:45:37.881015</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 723.7625 392.514375 
+L 723.7625 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+L 713.38125 22.318125 
+L 43.78125 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="maa3b94beaa" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#maa3b94beaa" x="43.78125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(36.410156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="127.48125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −6 -->
+      <g transform="translate(120.110156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-36" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="211.18125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- −4 -->
+      <g transform="translate(203.810156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="294.88125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- −2 -->
+      <g transform="translate(287.510156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-32" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="378.58125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 0 -->
+      <g transform="translate(375.4 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="462.28125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 2 -->
+      <g transform="translate(459.1 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="545.98125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 4 -->
+      <g transform="translate(542.8 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="629.68125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 6 -->
+      <g transform="translate(626.5 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#maa3b94beaa" x="713.38125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 8 -->
+      <g transform="translate(710.2 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- $x$ -->
+     <g transform="translate(375.58125 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_10">
+      <defs>
+       <path id="m3c11ddeafe" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m3c11ddeafe" x="43.78125" y="340.866116" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 0.0 -->
+      <g transform="translate(20.878125 344.665335) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#m3c11ddeafe" x="43.78125" y="293.203759" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 0.1 -->
+      <g transform="translate(20.878125 297.002977) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-31" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#m3c11ddeafe" x="43.78125" y="245.541401" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.2 -->
+      <g transform="translate(20.878125 249.34062) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#m3c11ddeafe" x="43.78125" y="197.879044" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.3 -->
+      <g transform="translate(20.878125 201.678263) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-33" d="M 2597 2516 
+Q 3050 2419 3304 2112 
+Q 3559 1806 3559 1356 
+Q 3559 666 3084 287 
+Q 2609 -91 1734 -91 
+Q 1441 -91 1130 -33 
+Q 819 25 488 141 
+L 488 750 
+Q 750 597 1062 519 
+Q 1375 441 1716 441 
+Q 2309 441 2620 675 
+Q 2931 909 2931 1356 
+Q 2931 1769 2642 2001 
+Q 2353 2234 1838 2234 
+L 1294 2234 
+L 1294 2753 
+L 1863 2753 
+Q 2328 2753 2575 2939 
+Q 2822 3125 2822 3475 
+Q 2822 3834 2567 4026 
+Q 2313 4219 1838 4219 
+Q 1578 4219 1281 4162 
+Q 984 4106 628 3988 
+L 628 4550 
+Q 988 4650 1302 4700 
+Q 1616 4750 1894 4750 
+Q 2613 4750 3031 4423 
+Q 3450 4097 3450 3541 
+Q 3450 3153 3228 2886 
+Q 3006 2619 2597 2516 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-33" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#m3c11ddeafe" x="43.78125" y="150.216687" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.4 -->
+      <g transform="translate(20.878125 154.015905) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-34" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#m3c11ddeafe" x="43.78125" y="102.554329" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 0.5 -->
+      <g transform="translate(20.878125 106.353548) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-35" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_7">
+     <g id="line2d_16">
+      <g>
+       <use xlink:href="#m3c11ddeafe" x="43.78125" y="54.891972" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_17">
+      <!-- 0.6 -->
+      <g transform="translate(20.878125 58.691191) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-36" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_18">
+     <!-- pdf -->
+     <g transform="translate(14.798438 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_17">
+    <path d="M 43.617773 336.24356 
+L 73.370508 335.404862 
+L 97.728516 334.503401 
+L 118.163086 333.530667 
+L 135.328125 332.498104 
+L 150.041016 331.397027 
+L 162.792188 330.224646 
+L 173.908594 328.982821 
+L 183.553711 327.688291 
+L 192.054492 326.331721 
+L 199.737891 324.884131 
+L 206.603906 323.366116 
+L 212.816016 321.7643 
+L 218.374219 320.103076 
+L 223.441992 318.357538 
+L 228.019336 316.549945 
+L 232.269727 314.633345 
+L 236.193164 312.618584 
+L 239.789648 310.522893 
+L 243.05918 308.371025 
+L 246.165234 306.067934 
+L 249.107812 303.610089 
+L 251.886914 300.996391 
+L 254.502539 298.228989 
+L 256.954687 295.314205 
+L 259.243359 292.263573 
+L 261.532031 288.836225 
+L 263.657227 285.254831 
+L 265.782422 281.217172 
+L 267.744141 277.008921 
+L 269.705859 272.249246 
+L 271.504102 267.308153 
+L 273.302344 261.704266 
+L 274.937109 255.921192 
+L 276.571875 249.34966 
+L 278.206641 241.825995 
+L 279.841406 233.139975 
+L 281.312695 224.104348 
+L 282.783984 213.64826 
+L 284.255273 201.437363 
+L 285.563086 188.761356 
+L 286.870898 173.967719 
+L 288.178711 156.576218 
+L 289.486523 136.033251 
+L 290.794336 111.838958 
+L 292.429102 76.771784 
+L 293.736914 49.159435 
+L 294.227344 41.64056 
+L 294.554297 38.532783 
+L 294.88125 37.438125 
+L 295.044727 37.714863 
+L 295.37168 39.857777 
+L 295.862109 46.349249 
+L 296.679492 62.287163 
+L 299.62207 124.41265 
+L 300.929883 146.734145 
+L 302.237695 165.63019 
+L 303.545508 181.657095 
+L 305.016797 196.905396 
+L 306.488086 209.792079 
+L 307.959375 220.791552 
+L 309.430664 230.268642 
+L 311.06543 239.353279 
+L 312.700195 247.201073 
+L 314.334961 254.039208 
+L 316.133203 260.604113 
+L 317.931445 266.34211 
+L 319.893164 271.824454 
+L 321.854883 276.634695 
+L 323.816602 280.885285 
+L 325.941797 284.961447 
+L 328.066992 288.575231 
+L 330.355664 292.032005 
+L 332.807813 295.314205 
+L 335.259961 298.228989 
+L 337.875586 300.996391 
+L 340.654688 303.610089 
+L 343.597266 306.067934 
+L 346.70332 308.371025 
+L 349.972852 310.522893 
+L 353.569336 312.618584 
+L 357.329297 314.55442 
+L 361.416211 316.411231 
+L 365.830078 318.175148 
+L 370.734375 319.891208 
+L 376.129102 321.534656 
+L 382.014258 323.088829 
+L 388.55332 324.578411 
+L 395.746289 325.984196 
+L 403.756641 327.320037 
+L 412.747852 328.590662 
+L 422.883398 329.795099 
+L 434.490234 330.944169 
+L 447.731836 332.025251 
+L 462.935156 333.038521 
+L 480.590625 333.987957 
+L 501.352148 334.87637 
+L 526.037109 335.70343 
+L 555.789844 336.469488 
+L 592.081641 337.172342 
+L 637.037695 337.811359 
+L 693.927539 338.387788 
+L 713.544727 338.545125 
+L 713.544727 338.545125 
+" clip-path="url(#pec2569bed2)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_18">
+    <path d="M 43.617773 337.722263 
+L 90.208594 337.019846 
+L 126.99082 336.25154 
+L 156.743555 335.415461 
+L 181.265039 334.510284 
+L 201.699609 333.539441 
+L 219.028125 332.498104 
+L 233.741016 331.397027 
+L 246.492188 330.224646 
+L 257.608594 328.982821 
+L 267.253711 327.688291 
+L 275.754492 326.331721 
+L 283.437891 324.884131 
+L 290.303906 323.366116 
+L 296.516016 321.7643 
+L 302.074219 320.103076 
+L 307.141992 318.357538 
+L 311.719336 316.549945 
+L 315.969727 314.633345 
+L 319.893164 312.618584 
+L 323.489648 310.522893 
+L 326.75918 308.371025 
+L 329.865234 306.067934 
+L 332.807813 303.610089 
+L 335.586914 300.996391 
+L 338.202539 298.228989 
+L 340.654688 295.314205 
+L 342.943359 292.263573 
+L 345.232031 288.836225 
+L 347.357227 285.254831 
+L 349.482422 281.217172 
+L 351.444141 277.008921 
+L 353.405859 272.249246 
+L 355.204102 267.308153 
+L 357.002344 261.704266 
+L 358.637109 255.921192 
+L 360.271875 249.34966 
+L 361.906641 241.825995 
+L 363.541406 233.139975 
+L 365.012695 224.104348 
+L 366.483984 213.64826 
+L 367.955273 201.437363 
+L 369.263086 188.761356 
+L 370.570898 173.967719 
+L 371.878711 156.576218 
+L 373.186523 136.033251 
+L 374.494336 111.838958 
+L 376.129102 76.771784 
+L 377.436914 49.159435 
+L 377.927344 41.64056 
+L 378.254297 38.532783 
+L 378.58125 37.438125 
+L 378.744727 37.714863 
+L 379.07168 39.857777 
+L 379.562109 46.349249 
+L 380.379492 62.287163 
+L 383.32207 124.41265 
+L 384.629883 146.734145 
+L 385.937695 165.63019 
+L 387.245508 181.657095 
+L 388.716797 196.905396 
+L 390.188086 209.792079 
+L 391.659375 220.791552 
+L 393.130664 230.268642 
+L 394.76543 239.353279 
+L 396.400195 247.201073 
+L 398.034961 254.039208 
+L 399.833203 260.604113 
+L 401.631445 266.34211 
+L 403.593164 271.824454 
+L 405.554883 276.634695 
+L 407.516602 280.885285 
+L 409.641797 284.961447 
+L 411.766992 288.575231 
+L 414.055664 292.032005 
+L 416.507812 295.314205 
+L 418.959961 298.228989 
+L 421.575586 300.996391 
+L 424.354688 303.610089 
+L 427.297266 306.067934 
+L 430.40332 308.371025 
+L 433.672852 310.522893 
+L 437.269336 312.618584 
+L 441.029297 314.55442 
+L 445.116211 316.411231 
+L 449.530078 318.175148 
+L 454.434375 319.891208 
+L 459.829102 321.534656 
+L 465.714258 323.088829 
+L 472.25332 324.578411 
+L 479.446289 325.984196 
+L 487.456641 327.320037 
+L 496.447852 328.590662 
+L 506.583398 329.795099 
+L 518.190234 330.944169 
+L 531.431836 332.025251 
+L 546.635156 333.038521 
+L 564.290625 333.987957 
+L 585.052148 334.87637 
+L 609.737109 335.70343 
+L 639.489844 336.469488 
+L 675.781641 337.172342 
+L 713.544727 337.722263 
+L 713.544727 337.722263 
+" clip-path="url(#pec2569bed2)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_19">
+    <path d="M 43.617773 338.545125 
+L 109.825781 337.931682 
+L 160.340039 337.250052 
+L 199.901367 336.502301 
+L 231.61582 335.688925 
+L 257.608594 334.807343 
+L 279.1875 333.860057 
+L 297.333398 332.848163 
+L 312.863672 331.765018 
+L 326.26875 330.610809 
+L 337.875586 329.391855 
+L 348.011133 328.108008 
+L 357.002344 326.746898 
+L 364.849219 325.340464 
+L 371.878711 323.861711 
+L 378.254297 322.29664 
+L 383.975977 320.66704 
+L 389.207227 318.947558 
+L 393.948047 317.157626 
+L 398.361914 315.25024 
+L 402.285352 313.319212 
+L 406.045313 311.218273 
+L 409.47832 309.043816 
+L 412.747852 306.700992 
+L 415.69043 304.322521 
+L 418.469531 301.797596 
+L 421.085156 299.129012 
+L 423.537305 296.323587 
+L 425.989453 293.17091 
+L 428.278125 289.85773 
+L 430.40332 286.401807 
+L 432.528516 282.513039 
+L 434.490234 278.468214 
+L 436.451953 273.903336 
+L 438.250195 269.175434 
+L 440.048438 263.826582 
+L 441.84668 257.732449 
+L 443.481445 251.412868 
+L 445.116211 244.194525 
+L 446.750977 235.882664 
+L 448.222266 227.259932 
+L 449.693555 217.310283 
+L 451.164844 205.727107 
+L 452.472656 193.740182 
+L 453.780469 179.791969 
+L 455.088281 163.436628 
+L 456.396094 144.142078 
+L 457.703906 121.358887 
+L 459.175195 91.286543 
+L 461.300391 46.349249 
+L 461.79082 39.857777 
+L 462.117773 37.714863 
+L 462.28125 37.438125 
+L 462.444727 37.714863 
+L 462.77168 39.857777 
+L 463.262109 46.349249 
+L 464.079492 62.287163 
+L 467.02207 124.41265 
+L 468.329883 146.734145 
+L 469.637695 165.63019 
+L 470.945508 181.657095 
+L 472.416797 196.905396 
+L 473.888086 209.792079 
+L 475.359375 220.791552 
+L 476.830664 230.268642 
+L 478.46543 239.353279 
+L 480.100195 247.201073 
+L 481.734961 254.039208 
+L 483.533203 260.604113 
+L 485.331445 266.34211 
+L 487.293164 271.824454 
+L 489.254883 276.634695 
+L 491.216602 280.885285 
+L 493.341797 284.961447 
+L 495.466992 288.575231 
+L 497.755664 292.032005 
+L 500.207813 295.314205 
+L 502.659961 298.228989 
+L 505.275586 300.996391 
+L 508.054688 303.610089 
+L 510.997266 306.067934 
+L 514.10332 308.371025 
+L 517.372852 310.522893 
+L 520.969336 312.618584 
+L 524.729297 314.55442 
+L 528.816211 316.411231 
+L 533.230078 318.175148 
+L 538.134375 319.891208 
+L 543.529102 321.534656 
+L 549.414258 323.088829 
+L 555.95332 324.578411 
+L 563.146289 325.984196 
+L 571.156641 327.320037 
+L 580.147852 328.590662 
+L 590.283398 329.795099 
+L 601.890234 330.944169 
+L 615.131836 332.025251 
+L 630.335156 333.038521 
+L 647.990625 333.987957 
+L 668.752148 334.87637 
+L 693.437109 335.70343 
+L 713.544727 336.24356 
+L 713.544727 336.24356 
+" clip-path="url(#pec2569bed2)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 43.78125 354.958125 
+L 43.78125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 713.38125 354.958125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 43.78125 22.318125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_19">
+    <!-- SaS point5 Distribution (scale = 1) -->
+    <g transform="translate(274.6425 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-53" d="M 3425 4513 
+L 3425 3897 
+Q 3066 4069 2747 4153 
+Q 2428 4238 2131 4238 
+Q 1616 4238 1336 4038 
+Q 1056 3838 1056 3469 
+Q 1056 3159 1242 3001 
+Q 1428 2844 1947 2747 
+L 2328 2669 
+Q 3034 2534 3370 2195 
+Q 3706 1856 3706 1288 
+Q 3706 609 3251 259 
+Q 2797 -91 1919 -91 
+Q 1588 -91 1214 -16 
+Q 841 59 441 206 
+L 441 856 
+Q 825 641 1194 531 
+Q 1563 422 1919 422 
+Q 2459 422 2753 634 
+Q 3047 847 3047 1241 
+Q 3047 1584 2836 1778 
+Q 2625 1972 2144 2069 
+L 1759 2144 
+Q 1053 2284 737 2584 
+Q 422 2884 422 3419 
+Q 422 4038 858 4394 
+Q 1294 4750 2059 4750 
+Q 2388 4750 2728 4690 
+Q 3069 4631 3425 4513 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-53"/>
+     <use xlink:href="#DejaVuSans-61" x="63.476562"/>
+     <use xlink:href="#DejaVuSans-53" x="124.755859"/>
+     <use xlink:href="#DejaVuSans-20" x="188.232422"/>
+     <use xlink:href="#DejaVuSans-70" x="220.019531"/>
+     <use xlink:href="#DejaVuSans-6f" x="283.496094"/>
+     <use xlink:href="#DejaVuSans-69" x="344.677734"/>
+     <use xlink:href="#DejaVuSans-6e" x="372.460938"/>
+     <use xlink:href="#DejaVuSans-74" x="435.839844"/>
+     <use xlink:href="#DejaVuSans-35" x="475.048828"/>
+     <use xlink:href="#DejaVuSans-20" x="538.671875"/>
+     <use xlink:href="#DejaVuSans-44" x="570.458984"/>
+     <use xlink:href="#DejaVuSans-69" x="647.460938"/>
+     <use xlink:href="#DejaVuSans-73" x="675.244141"/>
+     <use xlink:href="#DejaVuSans-74" x="727.34375"/>
+     <use xlink:href="#DejaVuSans-72" x="766.552734"/>
+     <use xlink:href="#DejaVuSans-69" x="807.666016"/>
+     <use xlink:href="#DejaVuSans-62" x="835.449219"/>
+     <use xlink:href="#DejaVuSans-75" x="898.925781"/>
+     <use xlink:href="#DejaVuSans-74" x="962.304688"/>
+     <use xlink:href="#DejaVuSans-69" x="1001.513672"/>
+     <use xlink:href="#DejaVuSans-6f" x="1029.296875"/>
+     <use xlink:href="#DejaVuSans-6e" x="1090.478516"/>
+     <use xlink:href="#DejaVuSans-20" x="1153.857422"/>
+     <use xlink:href="#DejaVuSans-28" x="1185.644531"/>
+     <use xlink:href="#DejaVuSans-73" x="1224.658203"/>
+     <use xlink:href="#DejaVuSans-63" x="1276.757812"/>
+     <use xlink:href="#DejaVuSans-61" x="1331.738281"/>
+     <use xlink:href="#DejaVuSans-6c" x="1393.017578"/>
+     <use xlink:href="#DejaVuSans-65" x="1420.800781"/>
+     <use xlink:href="#DejaVuSans-20" x="1482.324219"/>
+     <use xlink:href="#DejaVuSans-3d" x="1514.111328"/>
+     <use xlink:href="#DejaVuSans-20" x="1597.900391"/>
+     <use xlink:href="#DejaVuSans-31" x="1629.6875"/>
+     <use xlink:href="#DejaVuSans-29" x="1693.310547"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 616.353125 74.3525 
+L 706.38125 74.3525 
+Q 708.38125 74.3525 708.38125 72.3525 
+L 708.38125 29.318125 
+Q 708.38125 27.318125 706.38125 27.318125 
+L 616.353125 27.318125 
+Q 614.353125 27.318125 614.353125 29.318125 
+L 614.353125 72.3525 
+Q 614.353125 74.3525 616.353125 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_20">
+     <path d="M 618.353125 35.416562 
+L 628.353125 35.416562 
+L 638.353125 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_20">
+     <!-- location=-2 -->
+     <g transform="translate(646.353125 38.916562) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-2d" d="M 313 2009 
+L 1997 2009 
+L 1997 1497 
+L 313 1497 
+L 313 2009 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-2d" x="480.566406"/>
+      <use xlink:href="#DejaVuSans-32" x="516.650391"/>
+     </g>
+    </g>
+    <g id="line2d_21">
+     <path d="M 618.353125 50.094687 
+L 628.353125 50.094687 
+L 638.353125 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- location=0 -->
+     <g transform="translate(646.353125 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-30" x="480.566406"/>
+     </g>
+    </g>
+    <g id="line2d_22">
+     <path d="M 618.353125 64.772812 
+L 628.353125 64.772812 
+L 638.353125 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_22">
+     <!-- location=2 -->
+     <g transform="translate(646.353125 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-6c"/>
+      <use xlink:href="#DejaVuSans-6f" x="27.783203"/>
+      <use xlink:href="#DejaVuSans-63" x="88.964844"/>
+      <use xlink:href="#DejaVuSans-61" x="143.945312"/>
+      <use xlink:href="#DejaVuSans-74" x="205.224609"/>
+      <use xlink:href="#DejaVuSans-69" x="244.433594"/>
+      <use xlink:href="#DejaVuSans-6f" x="272.216797"/>
+      <use xlink:href="#DejaVuSans-6e" x="333.398438"/>
+      <use xlink:href="#DejaVuSans-3d" x="396.777344"/>
+      <use xlink:href="#DejaVuSans-32" x="480.566406"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pec2569bed2">
+   <rect x="43.78125" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/saspoint5_pdf2.svg b/doc/graphs/saspoint5_pdf2.svg
new file mode 100644
index 0000000000..dbd503ce7f
--- /dev/null
+++ b/doc/graphs/saspoint5_pdf2.svg
@@ -0,0 +1,1340 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="723.7625pt" height="392.514375pt" viewBox="0 0 723.7625 392.514375" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2024-07-21T02:45:37.531784</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.9.0, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 392.514375 
+L 723.7625 392.514375 
+L 723.7625 0 
+L 0 0 
+z
+" style="fill: #ffffff"/>
+  </g>
+  <g id="axes_1">
+   <g id="patch_2">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+L 713.38125 22.318125 
+L 43.78125 22.318125 
+z
+" style="fill: #ffffff"/>
+   </g>
+   <g id="matplotlib.axis_1">
+    <g id="xtick_1">
+     <g id="line2d_1">
+      <defs>
+       <path id="m6d1934c92a" d="M 0 0 
+L 0 3.5 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#m6d1934c92a" x="43.78125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_1">
+      <!-- −8 -->
+      <g transform="translate(36.410156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2212" d="M 678 2272 
+L 4684 2272 
+L 4684 1741 
+L 678 1741 
+L 678 2272 
+z
+" transform="scale(0.015625)"/>
+        <path id="DejaVuSans-38" d="M 2034 2216 
+Q 1584 2216 1326 1975 
+Q 1069 1734 1069 1313 
+Q 1069 891 1326 650 
+Q 1584 409 2034 409 
+Q 2484 409 2743 651 
+Q 3003 894 3003 1313 
+Q 3003 1734 2745 1975 
+Q 2488 2216 2034 2216 
+z
+M 1403 2484 
+Q 997 2584 770 2862 
+Q 544 3141 544 3541 
+Q 544 4100 942 4425 
+Q 1341 4750 2034 4750 
+Q 2731 4750 3128 4425 
+Q 3525 4100 3525 3541 
+Q 3525 3141 3298 2862 
+Q 3072 2584 2669 2484 
+Q 3125 2378 3379 2068 
+Q 3634 1759 3634 1313 
+Q 3634 634 3220 271 
+Q 2806 -91 2034 -91 
+Q 1263 -91 848 271 
+Q 434 634 434 1313 
+Q 434 1759 690 2068 
+Q 947 2378 1403 2484 
+z
+M 1172 3481 
+Q 1172 3119 1398 2916 
+Q 1625 2713 2034 2713 
+Q 2441 2713 2670 2916 
+Q 2900 3119 2900 3481 
+Q 2900 3844 2670 4047 
+Q 2441 4250 2034 4250 
+Q 1625 4250 1398 4047 
+Q 1172 3844 1172 3481 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-38" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_2">
+     <g id="line2d_2">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="127.48125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_2">
+      <!-- −6 -->
+      <g transform="translate(120.110156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-36" d="M 2113 2584 
+Q 1688 2584 1439 2293 
+Q 1191 2003 1191 1497 
+Q 1191 994 1439 701 
+Q 1688 409 2113 409 
+Q 2538 409 2786 701 
+Q 3034 994 3034 1497 
+Q 3034 2003 2786 2293 
+Q 2538 2584 2113 2584 
+z
+M 3366 4563 
+L 3366 3988 
+Q 3128 4100 2886 4159 
+Q 2644 4219 2406 4219 
+Q 1781 4219 1451 3797 
+Q 1122 3375 1075 2522 
+Q 1259 2794 1537 2939 
+Q 1816 3084 2150 3084 
+Q 2853 3084 3261 2657 
+Q 3669 2231 3669 1497 
+Q 3669 778 3244 343 
+Q 2819 -91 2113 -91 
+Q 1303 -91 875 529 
+Q 447 1150 447 2328 
+Q 447 3434 972 4092 
+Q 1497 4750 2381 4750 
+Q 2619 4750 2861 4703 
+Q 3103 4656 3366 4563 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-36" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_3">
+     <g id="line2d_3">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="211.18125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_3">
+      <!-- −4 -->
+      <g transform="translate(203.810156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-34" d="M 2419 4116 
+L 825 1625 
+L 2419 1625 
+L 2419 4116 
+z
+M 2253 4666 
+L 3047 4666 
+L 3047 1625 
+L 3713 1625 
+L 3713 1100 
+L 3047 1100 
+L 3047 0 
+L 2419 0 
+L 2419 1100 
+L 313 1100 
+L 313 1709 
+L 2253 4666 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-34" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_4">
+     <g id="line2d_4">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="294.88125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_4">
+      <!-- −2 -->
+      <g transform="translate(287.510156 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-32" d="M 1228 531 
+L 3431 531 
+L 3431 0 
+L 469 0 
+L 469 531 
+Q 828 903 1448 1529 
+Q 2069 2156 2228 2338 
+Q 2531 2678 2651 2914 
+Q 2772 3150 2772 3378 
+Q 2772 3750 2511 3984 
+Q 2250 4219 1831 4219 
+Q 1534 4219 1204 4116 
+Q 875 4013 500 3803 
+L 500 4441 
+Q 881 4594 1212 4672 
+Q 1544 4750 1819 4750 
+Q 2544 4750 2975 4387 
+Q 3406 4025 3406 3419 
+Q 3406 3131 3298 2873 
+Q 3191 2616 2906 2266 
+Q 2828 2175 2409 1742 
+Q 1991 1309 1228 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-2212"/>
+       <use xlink:href="#DejaVuSans-32" x="83.789062"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_5">
+     <g id="line2d_5">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="378.58125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_5">
+      <!-- 0 -->
+      <g transform="translate(375.4 369.556562) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-30" d="M 2034 4250 
+Q 1547 4250 1301 3770 
+Q 1056 3291 1056 2328 
+Q 1056 1369 1301 889 
+Q 1547 409 2034 409 
+Q 2525 409 2770 889 
+Q 3016 1369 3016 2328 
+Q 3016 3291 2770 3770 
+Q 2525 4250 2034 4250 
+z
+M 2034 4750 
+Q 2819 4750 3233 4129 
+Q 3647 3509 3647 2328 
+Q 3647 1150 3233 529 
+Q 2819 -91 2034 -91 
+Q 1250 -91 836 529 
+Q 422 1150 422 2328 
+Q 422 3509 836 4129 
+Q 1250 4750 2034 4750 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_6">
+     <g id="line2d_6">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="462.28125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_6">
+      <!-- 2 -->
+      <g transform="translate(459.1 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-32"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_7">
+     <g id="line2d_7">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="545.98125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_7">
+      <!-- 4 -->
+      <g transform="translate(542.8 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-34"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_8">
+     <g id="line2d_8">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="629.68125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_8">
+      <!-- 6 -->
+      <g transform="translate(626.5 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-36"/>
+      </g>
+     </g>
+    </g>
+    <g id="xtick_9">
+     <g id="line2d_9">
+      <g>
+       <use xlink:href="#m6d1934c92a" x="713.38125" y="354.958125" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_9">
+      <!-- 8 -->
+      <g transform="translate(710.2 369.556562) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-38"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_10">
+     <!-- $x$ -->
+     <g transform="translate(375.58125 383.234687) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-Oblique-78" d="M 3841 3500 
+L 2234 1784 
+L 3219 0 
+L 2559 0 
+L 1819 1388 
+L 531 0 
+L -166 0 
+L 1556 1844 
+L 641 3500 
+L 1300 3500 
+L 1972 2234 
+L 3144 3500 
+L 3841 3500 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-Oblique-78" transform="translate(0 0.3125)"/>
+     </g>
+    </g>
+   </g>
+   <g id="matplotlib.axis_2">
+    <g id="ytick_1">
+     <g id="line2d_10">
+      <defs>
+       <path id="mc75b8f8a7f" d="M 0 0 
+L -3.5 0 
+" style="stroke: #000000; stroke-width: 0.8"/>
+      </defs>
+      <g>
+       <use xlink:href="#mc75b8f8a7f" x="43.78125" y="340.292481" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_11">
+      <!-- 0.0 -->
+      <g transform="translate(20.878125 344.0917) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-2e" d="M 684 794 
+L 1344 794 
+L 1344 0 
+L 684 0 
+L 684 794 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_2">
+     <g id="line2d_11">
+      <g>
+       <use xlink:href="#mc75b8f8a7f" x="43.78125" y="292.72023" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_12">
+      <!-- 0.2 -->
+      <g transform="translate(20.878125 296.519449) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_3">
+     <g id="line2d_12">
+      <g>
+       <use xlink:href="#mc75b8f8a7f" x="43.78125" y="245.147979" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_13">
+      <!-- 0.4 -->
+      <g transform="translate(20.878125 248.947198) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-34" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_4">
+     <g id="line2d_13">
+      <g>
+       <use xlink:href="#mc75b8f8a7f" x="43.78125" y="197.575728" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_14">
+      <!-- 0.6 -->
+      <g transform="translate(20.878125 201.374947) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-36" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_5">
+     <g id="line2d_14">
+      <g>
+       <use xlink:href="#mc75b8f8a7f" x="43.78125" y="150.003477" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_15">
+      <!-- 0.8 -->
+      <g transform="translate(20.878125 153.802696) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-30"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-38" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_6">
+     <g id="line2d_15">
+      <g>
+       <use xlink:href="#mc75b8f8a7f" x="43.78125" y="102.431226" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_16">
+      <!-- 1.0 -->
+      <g transform="translate(20.878125 106.230445) scale(0.1 -0.1)">
+       <defs>
+        <path id="DejaVuSans-31" d="M 794 531 
+L 1825 531 
+L 1825 4091 
+L 703 3866 
+L 703 4441 
+L 1819 4666 
+L 2450 4666 
+L 2450 531 
+L 3481 531 
+L 3481 0 
+L 794 0 
+L 794 531 
+z
+" transform="scale(0.015625)"/>
+       </defs>
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-30" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="ytick_7">
+     <g id="line2d_16">
+      <g>
+       <use xlink:href="#mc75b8f8a7f" x="43.78125" y="54.858975" style="stroke: #000000; stroke-width: 0.8"/>
+      </g>
+     </g>
+     <g id="text_17">
+      <!-- 1.2 -->
+      <g transform="translate(20.878125 58.658194) scale(0.1 -0.1)">
+       <use xlink:href="#DejaVuSans-31"/>
+       <use xlink:href="#DejaVuSans-2e" x="63.623047"/>
+       <use xlink:href="#DejaVuSans-32" x="95.410156"/>
+      </g>
+     </g>
+    </g>
+    <g id="text_18">
+     <!-- pdf -->
+     <g transform="translate(14.798438 196.746719) rotate(-90) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-70" d="M 1159 525 
+L 1159 -1331 
+L 581 -1331 
+L 581 3500 
+L 1159 3500 
+L 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+z
+M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-64" d="M 2906 2969 
+L 2906 4863 
+L 3481 4863 
+L 3481 0 
+L 2906 0 
+L 2906 525 
+Q 2725 213 2448 61 
+Q 2172 -91 1784 -91 
+Q 1150 -91 751 415 
+Q 353 922 353 1747 
+Q 353 2572 751 3078 
+Q 1150 3584 1784 3584 
+Q 2172 3584 2448 3432 
+Q 2725 3281 2906 2969 
+z
+M 947 1747 
+Q 947 1113 1208 752 
+Q 1469 391 1925 391 
+Q 2381 391 2643 752 
+Q 2906 1113 2906 1747 
+Q 2906 2381 2643 2742 
+Q 2381 3103 1925 3103 
+Q 1469 3103 1208 2742 
+Q 947 2381 947 1747 
+z
+" transform="scale(0.015625)"/>
+       <path id="DejaVuSans-66" d="M 2375 4863 
+L 2375 4384 
+L 1825 4384 
+Q 1516 4384 1395 4259 
+Q 1275 4134 1275 3809 
+L 1275 3500 
+L 2222 3500 
+L 2222 3053 
+L 1275 3053 
+L 1275 0 
+L 697 0 
+L 697 3053 
+L 147 3053 
+L 147 3500 
+L 697 3500 
+L 697 3744 
+Q 697 4328 969 4595 
+Q 1241 4863 1831 4863 
+L 2375 4863 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-70"/>
+      <use xlink:href="#DejaVuSans-64" x="63.476562"/>
+      <use xlink:href="#DejaVuSans-66" x="126.953125"/>
+     </g>
+    </g>
+   </g>
+   <g id="line2d_17">
+    <path d="M 43.617773 339.083355 
+L 113.095312 338.622058 
+L 161.320898 338.090881 
+L 196.468359 337.491321 
+L 223.115039 336.822959 
+L 243.876563 336.087809 
+L 260.387695 335.289782 
+L 273.95625 334.417936 
+L 285.072656 333.48967 
+L 294.554297 332.479684 
+L 302.564648 331.407649 
+L 309.430664 330.270472 
+L 315.31582 329.080721 
+L 320.54707 327.803353 
+L 325.124414 326.465044 
+L 329.211328 325.046496 
+L 332.971289 323.504082 
+L 336.24082 321.929568 
+L 339.183398 320.280538 
+L 341.9625 318.472781 
+L 344.578125 316.496312 
+L 346.866797 314.497105 
+L 348.991992 312.364173 
+L 351.117188 309.905157 
+L 353.078906 307.280104 
+L 354.877148 304.505682 
+L 356.675391 301.297821 
+L 358.310156 297.918318 
+L 359.944922 293.991564 
+L 361.416211 289.877244 
+L 362.8875 285.076473 
+L 364.195312 280.090533 
+L 365.503125 274.255734 
+L 366.810938 267.347471 
+L 367.955273 260.182214 
+L 369.099609 251.676678 
+L 370.243945 241.439596 
+L 371.388281 228.918773 
+L 372.369141 215.772548 
+L 373.35 199.647287 
+L 374.330859 179.522847 
+L 375.311719 153.971684 
+L 376.292578 121.200233 
+L 377.436914 73.043716 
+L 378.254297 41.632616 
+L 378.58125 37.438125 
+L 378.744727 38.530714 
+L 379.07168 46.332402 
+L 379.725586 73.043716 
+L 381.033398 127.236475 
+L 382.014258 158.681122 
+L 382.995117 183.209574 
+L 383.975977 202.580502 
+L 384.956836 218.14788 
+L 386.101172 232.769997 
+L 387.245508 244.569633 
+L 388.389844 254.263948 
+L 389.697656 263.398591 
+L 391.005469 270.952714 
+L 392.313281 277.291674 
+L 393.78457 283.294594 
+L 395.255859 288.360953 
+L 396.727148 292.687443 
+L 398.361914 296.803228 
+L 399.99668 300.334751 
+L 401.794922 303.677661 
+L 403.593164 306.561524 
+L 405.554883 309.283755 
+L 407.680078 311.828104 
+L 409.805273 314.030527 
+L 412.093945 316.090997 
+L 414.546094 318.005982 
+L 417.161719 319.776494 
+L 420.104297 321.495686 
+L 423.210352 323.056917 
+L 426.643359 324.5387 
+L 430.566797 325.98152 
+L 434.817187 327.305261 
+L 439.558008 328.550818 
+L 444.952734 329.737081 
+L 451.164844 330.868367 
+L 458.194336 331.918276 
+L 466.368164 332.909087 
+L 475.849805 333.830131 
+L 487.129688 334.695223 
+L 500.534766 335.493053 
+L 516.718945 336.226806 
+L 536.663086 336.900073 
+L 561.675 337.511692 
+L 593.55293 338.058694 
+L 635.40293 338.543646 
+L 692.292773 338.967588 
+L 713.544727 339.083355 
+L 713.544727 339.083355 
+" clip-path="url(#p37161d8b17)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_18">
+    <path d="M 43.617773 338.723526 
+L 107.373633 338.208733 
+L 153.310547 337.626808 
+L 187.804102 336.978056 
+L 214.614258 336.260259 
+L 235.866211 335.477248 
+L 253.03125 334.631575 
+L 267.253711 333.716025 
+L 279.1875 332.730768 
+L 289.323047 331.674699 
+L 297.987305 330.551615 
+L 305.507227 329.353921 
+L 312.046289 328.088155 
+L 317.767969 326.756034 
+L 322.835742 325.349566 
+L 327.249609 323.903107 
+L 331.336523 322.331219 
+L 334.933008 320.716309 
+L 338.202539 319.01422 
+L 341.308594 317.142022 
+L 344.087695 315.207455 
+L 346.70332 313.111809 
+L 349.155469 310.850865 
+L 351.444141 308.424245 
+L 353.569336 305.836912 
+L 355.531055 303.100922 
+L 357.492773 299.957763 
+L 359.291016 296.639842 
+L 360.925781 293.181169 
+L 362.560547 289.208556 
+L 364.195312 284.605627 
+L 365.666602 279.799219 
+L 367.137891 274.215103 
+L 368.60918 267.66582 
+L 369.916992 260.838464 
+L 371.224805 252.840161 
+L 372.532617 243.41 
+L 373.84043 232.270352 
+L 375.148242 219.259095 
+L 377.763867 192.05222 
+L 378.254297 189.411597 
+L 378.58125 188.865303 
+L 378.744727 189.00341 
+L 379.07168 190.072842 
+L 379.562109 193.312441 
+L 380.379492 201.266333 
+L 383.32207 232.270352 
+L 384.629883 243.41 
+L 385.937695 252.840161 
+L 387.245508 260.838464 
+L 388.716797 268.448201 
+L 390.188086 274.879361 
+L 391.659375 280.3687 
+L 393.294141 285.583333 
+L 394.928906 290.049335 
+L 396.563672 293.910807 
+L 398.361914 297.591261 
+L 400.160156 300.786384 
+L 402.121875 303.819976 
+L 404.24707 306.671323 
+L 406.372266 309.152512 
+L 408.660937 311.485271 
+L 411.113086 313.66382 
+L 413.728711 315.68754 
+L 416.507812 317.559583 
+L 419.613867 319.374881 
+L 422.883398 321.028381 
+L 426.479883 322.600012 
+L 430.40332 324.075651 
+L 434.817187 325.496584 
+L 439.721484 326.838853 
+L 445.279688 328.122853 
+L 451.491797 329.325079 
+L 458.521289 330.456632 
+L 466.695117 331.539483 
+L 476.013281 332.544331 
+L 486.966211 333.494428 
+L 499.880859 334.382042 
+L 515.08418 335.197768 
+L 533.557031 335.957651 
+L 556.116797 336.653496 
+L 584.071289 337.284563 
+L 619.545703 337.853848 
+L 665.80957 338.362725 
+L 713.544727 338.723526 
+L 713.544727 338.723526 
+" clip-path="url(#p37161d8b17)" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="line2d_19">
+    <path d="M 43.617773 338.330677 
+L 103.940625 337.764296 
+L 148.733203 337.13148 
+L 183.226758 336.430274 
+L 210.363867 335.664284 
+L 232.269727 334.830386 
+L 250.252148 333.928783 
+L 265.128516 332.966794 
+L 277.716211 331.935413 
+L 288.342188 330.849464 
+L 297.496875 329.69894 
+L 305.507227 328.474224 
+L 312.536719 327.178774 
+L 318.748828 325.810707 
+L 324.307031 324.358363 
+L 329.211328 322.849203 
+L 333.625195 321.261192 
+L 337.548633 319.622033 
+L 341.145117 317.88707 
+L 344.414648 316.073509 
+L 347.520703 314.096236 
+L 350.299805 312.072233 
+L 352.91543 309.901695 
+L 355.367578 307.585921 
+L 357.65625 305.131182 
+L 359.944922 302.33818 
+L 362.070117 299.384356 
+L 364.031836 296.294057 
+L 365.993555 292.796379 
+L 367.955273 288.831429 
+L 369.916992 284.348387 
+L 372.042188 278.901251 
+L 376.619531 266.802461 
+L 377.436914 265.391674 
+L 378.09082 264.733533 
+L 378.58125 264.578892 
+L 379.07168 264.733533 
+L 379.562109 265.182661 
+L 380.216016 266.17235 
+L 381.033398 267.876822 
+L 382.341211 271.221755 
+L 387.408984 284.742463 
+L 389.370703 289.181074 
+L 391.332422 293.104882 
+L 393.294141 296.566321 
+L 395.419336 299.863879 
+L 397.544531 302.760248 
+L 399.833203 305.50119 
+L 402.285352 308.073309 
+L 404.7375 310.330591 
+L 407.353125 312.449054 
+L 410.295703 314.535651 
+L 413.401758 316.456515 
+L 416.671289 318.221247 
+L 420.267773 319.912183 
+L 424.191211 321.512226 
+L 428.605078 323.064696 
+L 433.509375 324.542279 
+L 438.904102 325.927683 
+L 444.952734 327.244474 
+L 451.655273 328.474224 
+L 459.338672 329.653351 
+L 468.166406 330.774634 
+L 478.301953 331.829138 
+L 490.072266 332.821023 
+L 503.804297 333.746686 
+L 519.988477 334.607467 
+L 539.278711 335.404332 
+L 562.655859 336.140494 
+L 591.427734 336.815967 
+L 627.392578 337.429338 
+L 673.492969 337.982916 
+L 713.544727 338.330677 
+L 713.544727 338.330677 
+" clip-path="url(#p37161d8b17)" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+   </g>
+   <g id="patch_3">
+    <path d="M 43.78125 354.958125 
+L 43.78125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_4">
+    <path d="M 713.38125 354.958125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_5">
+    <path d="M 43.78125 354.958125 
+L 713.38125 354.958125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="patch_6">
+    <path d="M 43.78125 22.318125 
+L 713.38125 22.318125 
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+   </g>
+   <g id="text_19">
+    <!-- SaS point5 Distribution (location = 0) -->
+    <g transform="translate(266.295938 16.318125) scale(0.12 -0.12)">
+     <defs>
+      <path id="DejaVuSans-53" d="M 3425 4513 
+L 3425 3897 
+Q 3066 4069 2747 4153 
+Q 2428 4238 2131 4238 
+Q 1616 4238 1336 4038 
+Q 1056 3838 1056 3469 
+Q 1056 3159 1242 3001 
+Q 1428 2844 1947 2747 
+L 2328 2669 
+Q 3034 2534 3370 2195 
+Q 3706 1856 3706 1288 
+Q 3706 609 3251 259 
+Q 2797 -91 1919 -91 
+Q 1588 -91 1214 -16 
+Q 841 59 441 206 
+L 441 856 
+Q 825 641 1194 531 
+Q 1563 422 1919 422 
+Q 2459 422 2753 634 
+Q 3047 847 3047 1241 
+Q 3047 1584 2836 1778 
+Q 2625 1972 2144 2069 
+L 1759 2144 
+Q 1053 2284 737 2584 
+Q 422 2884 422 3419 
+Q 422 4038 858 4394 
+Q 1294 4750 2059 4750 
+Q 2388 4750 2728 4690 
+Q 3069 4631 3425 4513 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-61" d="M 2194 1759 
+Q 1497 1759 1228 1600 
+Q 959 1441 959 1056 
+Q 959 750 1161 570 
+Q 1363 391 1709 391 
+Q 2188 391 2477 730 
+Q 2766 1069 2766 1631 
+L 2766 1759 
+L 2194 1759 
+z
+M 3341 1997 
+L 3341 0 
+L 2766 0 
+L 2766 531 
+Q 2569 213 2275 61 
+Q 1981 -91 1556 -91 
+Q 1019 -91 701 211 
+Q 384 513 384 1019 
+Q 384 1609 779 1909 
+Q 1175 2209 1959 2209 
+L 2766 2209 
+L 2766 2266 
+Q 2766 2663 2505 2880 
+Q 2244 3097 1772 3097 
+Q 1472 3097 1187 3025 
+Q 903 2953 641 2809 
+L 641 3341 
+Q 956 3463 1253 3523 
+Q 1550 3584 1831 3584 
+Q 2591 3584 2966 3190 
+Q 3341 2797 3341 1997 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6f" d="M 1959 3097 
+Q 1497 3097 1228 2736 
+Q 959 2375 959 1747 
+Q 959 1119 1226 758 
+Q 1494 397 1959 397 
+Q 2419 397 2687 759 
+Q 2956 1122 2956 1747 
+Q 2956 2369 2687 2733 
+Q 2419 3097 1959 3097 
+z
+M 1959 3584 
+Q 2709 3584 3137 3096 
+Q 3566 2609 3566 1747 
+Q 3566 888 3137 398 
+Q 2709 -91 1959 -91 
+Q 1206 -91 779 398 
+Q 353 888 353 1747 
+Q 353 2609 779 3096 
+Q 1206 3584 1959 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-69" d="M 603 3500 
+L 1178 3500 
+L 1178 0 
+L 603 0 
+L 603 3500 
+z
+M 603 4863 
+L 1178 4863 
+L 1178 4134 
+L 603 4134 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6e" d="M 3513 2113 
+L 3513 0 
+L 2938 0 
+L 2938 2094 
+Q 2938 2591 2744 2837 
+Q 2550 3084 2163 3084 
+Q 1697 3084 1428 2787 
+Q 1159 2491 1159 1978 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1366 3272 1645 3428 
+Q 1925 3584 2291 3584 
+Q 2894 3584 3203 3211 
+Q 3513 2838 3513 2113 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-74" d="M 1172 4494 
+L 1172 3500 
+L 2356 3500 
+L 2356 3053 
+L 1172 3053 
+L 1172 1153 
+Q 1172 725 1289 603 
+Q 1406 481 1766 481 
+L 2356 481 
+L 2356 0 
+L 1766 0 
+Q 1100 0 847 248 
+Q 594 497 594 1153 
+L 594 3053 
+L 172 3053 
+L 172 3500 
+L 594 3500 
+L 594 4494 
+L 1172 4494 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-35" d="M 691 4666 
+L 3169 4666 
+L 3169 4134 
+L 1269 4134 
+L 1269 2991 
+Q 1406 3038 1543 3061 
+Q 1681 3084 1819 3084 
+Q 2600 3084 3056 2656 
+Q 3513 2228 3513 1497 
+Q 3513 744 3044 326 
+Q 2575 -91 1722 -91 
+Q 1428 -91 1123 -41 
+Q 819 9 494 109 
+L 494 744 
+Q 775 591 1075 516 
+Q 1375 441 1709 441 
+Q 2250 441 2565 725 
+Q 2881 1009 2881 1497 
+Q 2881 1984 2565 2268 
+Q 2250 2553 1709 2553 
+Q 1456 2553 1204 2497 
+Q 953 2441 691 2322 
+L 691 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-44" d="M 1259 4147 
+L 1259 519 
+L 2022 519 
+Q 2988 519 3436 956 
+Q 3884 1394 3884 2338 
+Q 3884 3275 3436 3711 
+Q 2988 4147 2022 4147 
+L 1259 4147 
+z
+M 628 4666 
+L 1925 4666 
+Q 3281 4666 3915 4102 
+Q 4550 3538 4550 2338 
+Q 4550 1131 3912 565 
+Q 3275 0 1925 0 
+L 628 0 
+L 628 4666 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-73" d="M 2834 3397 
+L 2834 2853 
+Q 2591 2978 2328 3040 
+Q 2066 3103 1784 3103 
+Q 1356 3103 1142 2972 
+Q 928 2841 928 2578 
+Q 928 2378 1081 2264 
+Q 1234 2150 1697 2047 
+L 1894 2003 
+Q 2506 1872 2764 1633 
+Q 3022 1394 3022 966 
+Q 3022 478 2636 193 
+Q 2250 -91 1575 -91 
+Q 1294 -91 989 -36 
+Q 684 19 347 128 
+L 347 722 
+Q 666 556 975 473 
+Q 1284 391 1588 391 
+Q 1994 391 2212 530 
+Q 2431 669 2431 922 
+Q 2431 1156 2273 1281 
+Q 2116 1406 1581 1522 
+L 1381 1569 
+Q 847 1681 609 1914 
+Q 372 2147 372 2553 
+Q 372 3047 722 3315 
+Q 1072 3584 1716 3584 
+Q 2034 3584 2315 3537 
+Q 2597 3491 2834 3397 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-72" d="M 2631 2963 
+Q 2534 3019 2420 3045 
+Q 2306 3072 2169 3072 
+Q 1681 3072 1420 2755 
+Q 1159 2438 1159 1844 
+L 1159 0 
+L 581 0 
+L 581 3500 
+L 1159 3500 
+L 1159 2956 
+Q 1341 3275 1631 3429 
+Q 1922 3584 2338 3584 
+Q 2397 3584 2469 3576 
+Q 2541 3569 2628 3553 
+L 2631 2963 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-62" d="M 3116 1747 
+Q 3116 2381 2855 2742 
+Q 2594 3103 2138 3103 
+Q 1681 3103 1420 2742 
+Q 1159 2381 1159 1747 
+Q 1159 1113 1420 752 
+Q 1681 391 2138 391 
+Q 2594 391 2855 752 
+Q 3116 1113 3116 1747 
+z
+M 1159 2969 
+Q 1341 3281 1617 3432 
+Q 1894 3584 2278 3584 
+Q 2916 3584 3314 3078 
+Q 3713 2572 3713 1747 
+Q 3713 922 3314 415 
+Q 2916 -91 2278 -91 
+Q 1894 -91 1617 61 
+Q 1341 213 1159 525 
+L 1159 0 
+L 581 0 
+L 581 4863 
+L 1159 4863 
+L 1159 2969 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-75" d="M 544 1381 
+L 544 3500 
+L 1119 3500 
+L 1119 1403 
+Q 1119 906 1312 657 
+Q 1506 409 1894 409 
+Q 2359 409 2629 706 
+Q 2900 1003 2900 1516 
+L 2900 3500 
+L 3475 3500 
+L 3475 0 
+L 2900 0 
+L 2900 538 
+Q 2691 219 2414 64 
+Q 2138 -91 1772 -91 
+Q 1169 -91 856 284 
+Q 544 659 544 1381 
+z
+M 1991 3584 
+L 1991 3584 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-28" d="M 1984 4856 
+Q 1566 4138 1362 3434 
+Q 1159 2731 1159 2009 
+Q 1159 1288 1364 580 
+Q 1569 -128 1984 -844 
+L 1484 -844 
+Q 1016 -109 783 600 
+Q 550 1309 550 2009 
+Q 550 2706 781 3412 
+Q 1013 4119 1484 4856 
+L 1984 4856 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-6c" d="M 603 4863 
+L 1178 4863 
+L 1178 0 
+L 603 0 
+L 603 4863 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-63" d="M 3122 3366 
+L 3122 2828 
+Q 2878 2963 2633 3030 
+Q 2388 3097 2138 3097 
+Q 1578 3097 1268 2742 
+Q 959 2388 959 1747 
+Q 959 1106 1268 751 
+Q 1578 397 2138 397 
+Q 2388 397 2633 464 
+Q 2878 531 3122 666 
+L 3122 134 
+Q 2881 22 2623 -34 
+Q 2366 -91 2075 -91 
+Q 1284 -91 818 406 
+Q 353 903 353 1747 
+Q 353 2603 823 3093 
+Q 1294 3584 2113 3584 
+Q 2378 3584 2631 3529 
+Q 2884 3475 3122 3366 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-3d" d="M 678 2906 
+L 4684 2906 
+L 4684 2381 
+L 678 2381 
+L 678 2906 
+z
+M 678 1631 
+L 4684 1631 
+L 4684 1100 
+L 678 1100 
+L 678 1631 
+z
+" transform="scale(0.015625)"/>
+      <path id="DejaVuSans-29" d="M 513 4856 
+L 1013 4856 
+Q 1481 4119 1714 3412 
+Q 1947 2706 1947 2009 
+Q 1947 1309 1714 600 
+Q 1481 -109 1013 -844 
+L 513 -844 
+Q 928 -128 1133 580 
+Q 1338 1288 1338 2009 
+Q 1338 2731 1133 3434 
+Q 928 4138 513 4856 
+z
+" transform="scale(0.015625)"/>
+     </defs>
+     <use xlink:href="#DejaVuSans-53"/>
+     <use xlink:href="#DejaVuSans-61" x="63.476562"/>
+     <use xlink:href="#DejaVuSans-53" x="124.755859"/>
+     <use xlink:href="#DejaVuSans-20" x="188.232422"/>
+     <use xlink:href="#DejaVuSans-70" x="220.019531"/>
+     <use xlink:href="#DejaVuSans-6f" x="283.496094"/>
+     <use xlink:href="#DejaVuSans-69" x="344.677734"/>
+     <use xlink:href="#DejaVuSans-6e" x="372.460938"/>
+     <use xlink:href="#DejaVuSans-74" x="435.839844"/>
+     <use xlink:href="#DejaVuSans-35" x="475.048828"/>
+     <use xlink:href="#DejaVuSans-20" x="538.671875"/>
+     <use xlink:href="#DejaVuSans-44" x="570.458984"/>
+     <use xlink:href="#DejaVuSans-69" x="647.460938"/>
+     <use xlink:href="#DejaVuSans-73" x="675.244141"/>
+     <use xlink:href="#DejaVuSans-74" x="727.34375"/>
+     <use xlink:href="#DejaVuSans-72" x="766.552734"/>
+     <use xlink:href="#DejaVuSans-69" x="807.666016"/>
+     <use xlink:href="#DejaVuSans-62" x="835.449219"/>
+     <use xlink:href="#DejaVuSans-75" x="898.925781"/>
+     <use xlink:href="#DejaVuSans-74" x="962.304688"/>
+     <use xlink:href="#DejaVuSans-69" x="1001.513672"/>
+     <use xlink:href="#DejaVuSans-6f" x="1029.296875"/>
+     <use xlink:href="#DejaVuSans-6e" x="1090.478516"/>
+     <use xlink:href="#DejaVuSans-20" x="1153.857422"/>
+     <use xlink:href="#DejaVuSans-28" x="1185.644531"/>
+     <use xlink:href="#DejaVuSans-6c" x="1224.658203"/>
+     <use xlink:href="#DejaVuSans-6f" x="1252.441406"/>
+     <use xlink:href="#DejaVuSans-63" x="1313.623047"/>
+     <use xlink:href="#DejaVuSans-61" x="1368.603516"/>
+     <use xlink:href="#DejaVuSans-74" x="1429.882812"/>
+     <use xlink:href="#DejaVuSans-69" x="1469.091797"/>
+     <use xlink:href="#DejaVuSans-6f" x="1496.875"/>
+     <use xlink:href="#DejaVuSans-6e" x="1558.056641"/>
+     <use xlink:href="#DejaVuSans-20" x="1621.435547"/>
+     <use xlink:href="#DejaVuSans-3d" x="1653.222656"/>
+     <use xlink:href="#DejaVuSans-20" x="1737.011719"/>
+     <use xlink:href="#DejaVuSans-30" x="1768.798828"/>
+     <use xlink:href="#DejaVuSans-29" x="1832.421875"/>
+    </g>
+   </g>
+   <g id="legend_1">
+    <g id="patch_7">
+     <path d="M 624.33125 74.3525 
+L 706.38125 74.3525 
+Q 708.38125 74.3525 708.38125 72.3525 
+L 708.38125 29.318125 
+Q 708.38125 27.318125 706.38125 27.318125 
+L 624.33125 27.318125 
+Q 622.33125 27.318125 622.33125 29.318125 
+L 622.33125 72.3525 
+Q 622.33125 74.3525 624.33125 74.3525 
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+    </g>
+    <g id="line2d_20">
+     <path d="M 626.33125 35.416562 
+L 636.33125 35.416562 
+L 646.33125 35.416562 
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_20">
+     <!-- scale=0.5 -->
+     <g transform="translate(654.33125 38.916562) scale(0.1 -0.1)">
+      <defs>
+       <path id="DejaVuSans-65" d="M 3597 1894 
+L 3597 1613 
+L 953 1613 
+Q 991 1019 1311 708 
+Q 1631 397 2203 397 
+Q 2534 397 2845 478 
+Q 3156 559 3463 722 
+L 3463 178 
+Q 3153 47 2828 -22 
+Q 2503 -91 2169 -91 
+Q 1331 -91 842 396 
+Q 353 884 353 1716 
+Q 353 2575 817 3079 
+Q 1281 3584 2069 3584 
+Q 2775 3584 3186 3129 
+Q 3597 2675 3597 1894 
+z
+M 3022 2063 
+Q 3016 2534 2758 2815 
+Q 2500 3097 2075 3097 
+Q 1594 3097 1305 2825 
+Q 1016 2553 972 2059 
+L 3022 2063 
+z
+" transform="scale(0.015625)"/>
+      </defs>
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-30" x="341.455078"/>
+      <use xlink:href="#DejaVuSans-2e" x="405.078125"/>
+      <use xlink:href="#DejaVuSans-35" x="436.865234"/>
+     </g>
+    </g>
+    <g id="line2d_21">
+     <path d="M 626.33125 50.094687 
+L 636.33125 50.094687 
+L 646.33125 50.094687 
+" style="fill: none; stroke: #ff7f0e; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_21">
+     <!-- scale=1 -->
+     <g transform="translate(654.33125 53.594687) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-31" x="341.455078"/>
+     </g>
+    </g>
+    <g id="line2d_22">
+     <path d="M 626.33125 64.772812 
+L 636.33125 64.772812 
+L 646.33125 64.772812 
+" style="fill: none; stroke: #2ca02c; stroke-width: 1.5; stroke-linecap: square"/>
+    </g>
+    <g id="text_22">
+     <!-- scale=2 -->
+     <g transform="translate(654.33125 68.272812) scale(0.1 -0.1)">
+      <use xlink:href="#DejaVuSans-73"/>
+      <use xlink:href="#DejaVuSans-63" x="52.099609"/>
+      <use xlink:href="#DejaVuSans-61" x="107.080078"/>
+      <use xlink:href="#DejaVuSans-6c" x="168.359375"/>
+      <use xlink:href="#DejaVuSans-65" x="196.142578"/>
+      <use xlink:href="#DejaVuSans-3d" x="257.666016"/>
+      <use xlink:href="#DejaVuSans-32" x="341.455078"/>
+     </g>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="p37161d8b17">
+   <rect x="43.78125" y="22.318125" width="669.6" height="332.64"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/doc/graphs/saspoint5_pdf_accuracy_64.png b/doc/graphs/saspoint5_pdf_accuracy_64.png
new file mode 100644
index 0000000000..e1ecd8f509
Binary files /dev/null and b/doc/graphs/saspoint5_pdf_accuracy_64.png differ
diff --git a/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html b/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html
index 9328fa44b9..79f041a239 100644
--- a/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html
+++ b/doc/html/math_toolkit/dist_ref/dists/cauchy_dist.html
@@ -221,7 +221,7 @@ <h5>
                     Substituting into the above we get:
                   </p>
                   <div class="blockquote"><blockquote class="blockquote"><p>
-                      <span class="serif_italic">p = -atan(1/x) ; x &lt; 0</span>
+                      <span class="serif_italic">p = -atan(1/x)/π ; x &lt; 0</span>
                     </p></blockquote></div>
                   <p>
                     So the procedure is to calculate the cdf for -fabs(x) using the
diff --git a/doc/math.qbk b/doc/math.qbk
index 4b8804dbbb..385c93a5e8 100644
--- a/doc/math.qbk
+++ b/doc/math.qbk
@@ -424,7 +424,7 @@ and use the function's name as the link text.]
 
 
 [def __usual_accessors __cdf, __pdf, __quantile, __hazard,
-   __chf, __mean, __median, __mode, __variance, __sd, __skewness,
+   __chf, __logcdf, __logpdf, __mean, __median, __mode, __variance, __sd, __skewness,
    __kurtosis, __kurtosis_excess, __range and __support]
 
 [def __real_concept [link math_toolkit.real_concepts  real concept]]
@@ -557,6 +557,7 @@ and as a CD ISBN 0-9504833-2-X  978-0-9504833-2-0, Classification 519.2-dc22.
 [include overview/standalone.qbk]
 [include overview/result_type_calc.qbk]
 [include overview/error_handling.qbk]
+[include overview/gpu.qbk]
 
 [section:compilers_overview Compilers]
 [compilers_overview]
diff --git a/doc/overview/gpu.qbk b/doc/overview/gpu.qbk
new file mode 100644
index 0000000000..7fb27e645e
--- /dev/null
+++ b/doc/overview/gpu.qbk
@@ -0,0 +1,67 @@
+[section:gpu Support for GPU programming in Boost.Math]
+
+[h4 GPU Support]
+
+Selected functions, distributions, tools, etc. support running on both host and devices.
+These functions will have the annotation `BOOST_MATH_GPU_ENABLED` or `BOOST_MATH_CUDA_ENABLED` next to their individual documentation.
+Functions marked with `BOOST_MATH_GPU_ENABLED` are tested using CUDA (both NVCC and NVRTC) as well as SYCL to provide a wide range of support.
+Functions marked with `BOOST_MATH_CUDA_ENABLED` are few, but due to its restrictions SYCL is unsupported.
+
+[h4 Policies]
+
+The default policy on all devices is ignore error due to the lack of throwing ability.
+A user can specify their own policy like usual, but when the code is run on device it will be ignored.
+
+[h4 How to build with device support]
+
+When compiling with CUDA or SYCL you will have to ensure that your code is being run inside of a kernel function.
+It is not enough to simply compile existing code with the NVCC compiler to run the code on the device.
+A simple CUDA kernel to run the Beta Distribution CDF on NVCC would be:
+
+    __global__ void cuda_beta_dist(const double* in, double* out, int num_elements)
+    {
+        const int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+        if (i < num_elements)
+        {
+            out[i] = cdf(boost::math::beta_distribution<double>(), in[i]);
+        }
+    }
+
+And on CUDA on NVRTC:
+
+    const char* cuda_kernel = R"(
+    #include <boost/math/distributions/beta.hpp>
+    extern "C" __global__ 
+    void test_beta_dist_kernel(const double* in, double* out, int num_elements)
+    {
+        const int i = blockDim.x * blockIdx.x + threadIdx.x;
+        if (i < num_elements)
+        {
+            out[i] = boost::math::cdf(boost::math::beta_distribution<double>(), in[i]);
+        }
+    }
+    )";
+
+And lastly on SYCL:
+
+    void sycl_beta_dist(const double* in, double* out, int num_elements, sycl::queue& q)
+    {
+        q.submit([&](sycl::handler& h) {
+            h.parallel_for(sycl::range<1>(num_elements), [=](sycl::id<1> i) {
+                out[i] = boost::math::cdf(boost::math::beta_distribution<double>(), in[i]);
+            });
+        });
+    }
+
+Once your kernel function has been written then use the framework mechanism for launching the kernel.
+
+[endsect] [/section:gpu Support for GPU programming in Boost.Math]
+
+[/ 
+  Copyright 2024. Matt Borland
+  Distributed under the Boost Software License, Version 1.0.
+  (See accompanying file LICENSE_1_0.txt or copy at
+  http://www.boost.org/LICENSE_1_0.txt).
+]
+
diff --git a/doc/quadrature/double_exponential.qbk b/doc/quadrature/double_exponential.qbk
index b4649adbc6..2959b94cd3 100644
--- a/doc/quadrature/double_exponential.qbk
+++ b/doc/quadrature/double_exponential.qbk
@@ -1,5 +1,6 @@
 [/
 Copyright (c) 2017 Nick Thompson
+Copyright (c) 2024 Matt Borland
 Use, modification and distribution are subject to the
 Boost Software License, Version 1.0. (See accompanying file
 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -538,6 +539,30 @@ This form integrates just fine over (-log([pi]/2), +[infin]) using either the `t
 
 [endsect] [/section:de_caveats Caveats]
 
+[section:gpu_usage GPU Usage]
+
+``
+    #include <boost/math/quadrature/exp_sinh.hpp>
+
+    namespace boost{ namespace math{ namespace quadrature {
+
+    template <class F, class Real, class Policy = policies::policy<> >
+    __device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+
+    template <class F, class Real, class Policy = policies::policy<> >
+    __device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+
+}}}
+``
+
+Quadrature is additionally able to run on CUDA (NVCC and NVRTC) platforms.
+The major difference is outlined in the above function signatures.
+When used on device these are free standing functions instead of using OOP like on the host.
+The tables of abscissas and weights are stored in shared read only memory on the device instead of being initialized when the class is constructed.
+An example use case would be in the finite elements method computing a stiffness matrix since it would consist of many different functions.
+
+[endsect] [/section:gpu_usage Usage]
+
 [section:de_refes References]
 
 * Hidetosi Takahasi and Masatake Mori, ['Double Exponential Formulas for Numerical Integration] Publ. Res. Inst. Math. Sci., 9 (1974), pp. 721-741.
diff --git a/doc/roots/roots.qbk b/doc/roots/roots.qbk
index a229300690..ea347639b9 100644
--- a/doc/roots/roots.qbk
+++ b/doc/roots/roots.qbk
@@ -1,4 +1,4 @@
-[section:roots_deriv Root Finding With Derivatives: Newton-Raphson, Halley & Schr'''&#xf6;'''der]
+[section:roots_deriv Root Finding With Derivatives: Newton-Raphson, Halley & Schroeder]
 
 [h4 Synopsis]
 
@@ -10,10 +10,10 @@
    namespace tools { // Note namespace boost::math::tools.
    // Newton-Raphson
    template <class F, class T>
-   T newton_raphson_iterate(F f, T guess, T min, T max, int digits);
+   BOOST_MATH_GPU_ENABLED T newton_raphson_iterate(F f, T guess, T min, T max, int digits);
 
    template <class F, class T>
-   T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter);
+   BOOST_MATH_GPU_ENABLED T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter);
 
    // Halley
    template <class F, class T>
@@ -22,7 +22,7 @@
    template <class F, class T>
    T halley_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter);
 
-   // Schr'''&#xf6;'''der
+   // Schroeder
    template <class F, class T>
    T schroder_iterate(F f, T guess, T min, T max, int digits);
 
@@ -61,7 +61,7 @@ For second-order iterative method ([@http://en.wikipedia.org/wiki/Newton_Raphson
 
 For the third-order methods
 ([@http://en.wikipedia.org/wiki/Halley%27s_method Halley] and
-Schr'''&#xf6;'''der)
+Schroeder)
         the `tuple` should have [*three] elements containing the evaluation of
         the function and its first and second derivatives.]]
 [[T guess] [The initial starting value. A good guess is crucial to quick convergence!]]
@@ -147,7 +147,7 @@ Out of bounds steps revert to bisection of the current bounds.
 
 Under ideal conditions, the number of correct digits trebles with each iteration.
 
-[h4:schroder Schr'''&#xf6;'''der's Method]
+[h4:schroder Schroeder's Method]
 
 Given an initial guess x0 the subsequent values are computed using:
 
@@ -162,8 +162,8 @@ Out of bounds steps revert to __bisection_wikipedia of the current bounds.
 
 Under ideal conditions, the number of correct digits trebles with each iteration.
 
-This is Schr'''&#xf6;'''der's general result (equation 18 from [@http://drum.lib.umd.edu/handle/1903/577 Stewart, G. W.
-"On Infinitely Many Algorithms for Solving Equations." English translation of Schr'''&#xf6;'''der's original paper.
+This is Schroeder's general result (equation 18 from [@http://drum.lib.umd.edu/handle/1903/577 Stewart, G. W.
+"On Infinitely Many Algorithms for Solving Equations." English translation of Schroeder's original paper.
 College Park, MD: University of Maryland, Institute for Advanced Computer Studies, Department of Computer Science, 1993].)
 
 This method guarantees at least quadratic convergence (the same as Newton's method), and is known to work well in the presence of multiple roots:
diff --git a/doc/sf/airy.qbk b/doc/sf/airy.qbk
index 5ff4c7cb5e..4756bee2d8 100644
--- a/doc/sf/airy.qbk
+++ b/doc/sf/airy.qbk
@@ -18,10 +18,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
    template <class T>
-   ``__sf_result`` airy_ai(T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai(T x);
 
    template <class T, class Policy>
-   ``__sf_result`` airy_ai(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai(T x, const Policy&);
 
   }} // namespaces
   
@@ -78,10 +78,10 @@ This function is implemented in terms of the Bessel functions using the relation
   namespace boost { namespace math {
 
    template <class T>
-   ``__sf_result`` airy_bi(T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi(T x);
 
    template <class T, class Policy>
-   ``__sf_result`` airy_bi(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi(T x, const Policy&);
 
   }} // namespaces
   
@@ -132,10 +132,10 @@ This function is implemented in terms of the Bessel functions using the relation
   namespace boost { namespace math {
 
    template <class T>
-   ``__sf_result`` airy_ai_prime(T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai_prime(T x);
 
    template <class T, class Policy>
-   ``__sf_result`` airy_ai_prime(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_ai_prime(T x, const Policy&);
 
   }} // namespaces
   
@@ -186,10 +186,10 @@ This function is implemented in terms of the Bessel functions using the relation
   namespace boost { namespace math {
 
    template <class T>
-   ``__sf_result`` airy_bi_prime(T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi_prime(T x);
 
    template <class T, class Policy>
-   ``__sf_result`` airy_bi_prime(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` airy_bi_prime(T x, const Policy&);
 
   }} // namespaces
   
@@ -242,23 +242,23 @@ by providing an output iterator.
 The signature of the single value functions are:
 
   template <class T>
-  T airy_ai_zero(
+  BOOST_MATH_GPU_ENABLED T airy_ai_zero(
            int m);         // 1-based index of zero.
 
   template <class T>
-  T airy_bi_zero(
+  BOOST_MATH_GPU_ENABLED T airy_bi_zero(
            int m);         // 1-based index of zero.
 
 and for multiple zeros:
 
  template <class T, class OutputIterator>
- OutputIterator airy_ai_zero(
+ BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero(
                       int start_index,           // 1-based index of first zero.
                       unsigned number_of_zeros,  // How many zeros to generate.
                       OutputIterator out_it);    // Destination for zeros.
 
  template <class T, class OutputIterator>
- OutputIterator airy_bi_zero(
+ BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero(
                       int start_index,           // 1-based index of zero.
                       unsigned number_of_zeros,  // How many zeros to generate
                       OutputIterator out_it);    // Destination for zeros.
@@ -266,25 +266,25 @@ and for multiple zeros:
 There are also versions which allow control of the __policy_section for error handling and precision.
 
   template <class T>
-  T airy_ai_zero(
+  BOOST_MATH_GPU_ENABLED T airy_ai_zero(
            int m,          // 1-based index of zero.
            const Policy&); // Policy to use.
 
   template <class T>
-  T airy_bi_zero(
+  BOOST_MATH_GPU_ENABLED T airy_bi_zero(
            int m,          // 1-based index of zero.
            const Policy&); // Policy to use.
 
 
  template <class T, class OutputIterator>
- OutputIterator airy_ai_zero(
+ BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero(
                       int start_index,           // 1-based index of first zero.
                       unsigned number_of_zeros,  // How many zeros to generate.
                       OutputIterator out_it,     // Destination for zeros.
                       const Policy& pol);        // Policy to use.
 
  template <class T, class OutputIterator>
- OutputIterator airy_bi_zero(
+ BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero(
                       int start_index,           // 1-based index of zero.
                       unsigned number_of_zeros,  // How many zeros to generate.
                       OutputIterator out_it,     // Destination for zeros.
diff --git a/doc/sf/bessel_ik.qbk b/doc/sf/bessel_ik.qbk
index d044ac7b80..9fa4e63a74 100644
--- a/doc/sf/bessel_ik.qbk
+++ b/doc/sf/bessel_ik.qbk
@@ -5,16 +5,16 @@
 `#include <boost/math/special_functions/bessel.hpp>`
 
    template <class T1, class T2>
-   ``__sf_result`` cyl_bessel_i(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_i(T1 v, T2 x);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` cyl_bessel_i(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_i(T1 v, T2 x, const ``__Policy``&);
 
    template <class T1, class T2>
-   ``__sf_result`` cyl_bessel_k(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_k(T1 v, T2 x);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` cyl_bessel_k(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_k(T1 v, T2 x, const ``__Policy``&);
    
    
 [h4 Description]
diff --git a/doc/sf/bessel_jy.qbk b/doc/sf/bessel_jy.qbk
index 1f43bc7580..faf8788500 100644
--- a/doc/sf/bessel_jy.qbk
+++ b/doc/sf/bessel_jy.qbk
@@ -5,16 +5,16 @@
 `#include <boost/math/special_functions/bessel.hpp>`
 
    template <class T1, class T2>
-   ``__sf_result`` cyl_bessel_j(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_j(T1 v, T2 x);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` cyl_bessel_j(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_bessel_j(T1 v, T2 x, const ``__Policy``&);
 
    template <class T1, class T2>
-   ``__sf_result`` cyl_neumann(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_neumann(T1 v, T2 x);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` cyl_neumann(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` cyl_neumann(T1 v, T2 x, const ``__Policy``&);
 
 
 [h4 Description]
diff --git a/doc/sf/bessel_spherical.qbk b/doc/sf/bessel_spherical.qbk
index e9cda89c70..eb1fa69154 100644
--- a/doc/sf/bessel_spherical.qbk
+++ b/doc/sf/bessel_spherical.qbk
@@ -5,16 +5,16 @@
 `#include <boost/math/special_functions/bessel.hpp>`
 
    template <class T1, class T2>
-   ``__sf_result`` sph_bessel(unsigned v, T2 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_bessel(unsigned v, T2 x);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` sph_bessel(unsigned v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_bessel(unsigned v, T2 x, const ``__Policy``&);
 
    template <class T1, class T2>
-   ``__sf_result`` sph_neumann(unsigned v, T2 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_neumann(unsigned v, T2 x);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` sph_neumann(unsigned v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sph_neumann(unsigned v, T2 x, const ``__Policy``&);
    
 [h4 Description]
 
diff --git a/doc/sf/beta.qbk b/doc/sf/beta.qbk
index e332fa5030..7e1904c254 100644
--- a/doc/sf/beta.qbk
+++ b/doc/sf/beta.qbk
@@ -9,10 +9,10 @@
    namespace boost{ namespace math{
    
    template <class T1, class T2>
-   ``__sf_result`` beta(T1 a, T2 b);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` beta(T1 a, T2 b, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, const ``__Policy``&);
    
    }} // namespaces
 
diff --git a/doc/sf/beta_derivative.qbk b/doc/sf/beta_derivative.qbk
index 8606d6f2b3..5d3b9a13ef 100644
--- a/doc/sf/beta_derivative.qbk
+++ b/doc/sf/beta_derivative.qbk
@@ -9,10 +9,10 @@
    namespace boost{ namespace math{ 
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_derivative(T1 a, T2 b, T3 x, const ``__Policy``&);
    
    }} // namespaces
    
diff --git a/doc/sf/digamma.qbk b/doc/sf/digamma.qbk
index c88c5fe7b0..78b68403d8 100644
--- a/doc/sf/digamma.qbk
+++ b/doc/sf/digamma.qbk
@@ -9,10 +9,10 @@
   namespace boost{ namespace math{
   
   template <class T>
-  ``__sf_result`` digamma(T z);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` digamma(T z);
   
   template <class T, class ``__Policy``>
-  ``__sf_result`` digamma(T z, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` digamma(T z, const ``__Policy``&);
   
   }} // namespaces
   
diff --git a/doc/sf/ellint_carlson.qbk b/doc/sf/ellint_carlson.qbk
index ca39cd6bef..db45697463 100644
--- a/doc/sf/ellint_carlson.qbk
+++ b/doc/sf/ellint_carlson.qbk
@@ -17,10 +17,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
 
   }} // namespaces
 
@@ -32,10 +32,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
 
   }} // namespaces
 
@@ -47,10 +47,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3, class T4>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
 
   template <class T1, class T2, class T3, class T4, class ``__Policy``>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
 
   }} // namespaces
 
@@ -62,10 +62,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_rc(T1 x, T2 y)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y)
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
 
   }} // namespaces
 
@@ -76,10 +76,10 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
 
   }} // namespaces
 
@@ -98,10 +98,10 @@ when the arguments are of different types: otherwise the return is the same type
 as the arguments.
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z)
   
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rf(T1 x, T2 y, T3 z, const ``__Policy``&)
   
 Returns Carlson's Elliptic Integral ['R[sub F]]:
 
@@ -113,10 +113,10 @@ one may be zero.  Otherwise returns the result of __domain_error.
 [optional_policy]
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z)
   
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rd(T1 x, T2 y, T3 z, const ``__Policy``&)
   
 Returns Carlson's elliptic integral R[sub D]:
 
@@ -128,10 +128,10 @@ zero, and that z >= 0.  Otherwise returns the result of __domain_error.
 [optional_policy]
 
   template <class T1, class T2, class T3, class T4>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p)
 
   template <class T1, class T2, class T3, class T4, class ``__Policy``>
-  ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rj(T1 x, T2 y, T3 z, T4 p, const ``__Policy``&)
 
 Returns Carlson's elliptic integral R[sub J]:
   
@@ -149,10 +149,10 @@ using the relation:
 [equation ellint17]
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_rc(T1 x, T2 y)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y)
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rc(T1 x, T2 y, const ``__Policy``&)
 
 Returns Carlson's elliptic integral R[sub C]:
   
@@ -170,10 +170,10 @@ using the relation:
 [equation ellint18]
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z)
   
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_rg(T1 x, T2 y, T3 z, const ``__Policy``&)
   
 Returns Carlson's elliptic integral ['R[sub G]:]
 
diff --git a/doc/sf/ellint_legendre.qbk b/doc/sf/ellint_legendre.qbk
index c780a9b019..50b633af9f 100644
--- a/doc/sf/ellint_legendre.qbk
+++ b/doc/sf/ellint_legendre.qbk
@@ -17,16 +17,16 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_1(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
 
   template <class T>
-  ``__sf_result`` ellint_1(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k);
 
   template <class T, class ``__Policy``>
-  ``__sf_result`` ellint_1(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k, const ``__Policy``&);
 
   }} // namespaces
   
@@ -42,10 +42,10 @@ when T1 and T2 are different types: when they are the same type then the result
 is the same type as the arguments.
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_1(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi);
   
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T1 k, T2 phi, const ``__Policy``&);
   
 Returns the incomplete elliptic integral of the first kind ['F([phi], k)]:
 
@@ -56,10 +56,10 @@ Requires k[super 2]sin[super 2](phi) < 1, otherwise returns the result of __doma
 [optional_policy]
 
   template <class T>
-  ``__sf_result`` ellint_1(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k);
   
   template <class T>
-  ``__sf_result`` ellint_1(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_1(T k, const ``__Policy``&);
   
 Returns the complete elliptic integral of the first kind ['K(k)]:
 
@@ -123,16 +123,16 @@ and
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_2(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
 
   template <class T>
-  ``__sf_result`` ellint_2(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k);
 
   template <class T, class ``__Policy``>
-  ``__sf_result`` ellint_2(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k, const ``__Policy``&);
 
   }} // namespaces
   
@@ -148,10 +148,10 @@ when T1 and T2 are different types: when they are the same type then the result
 is the same type as the arguments.
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_2(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi);
   
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T1 k, T2 phi, const ``__Policy``&);
   
 Returns the incomplete elliptic integral of the second kind ['E([phi], k)]:
 
@@ -162,10 +162,10 @@ Requires k[super 2]sin[super 2](phi) < 1, otherwise returns the result of __doma
 [optional_policy]
 
   template <class T>
-  ``__sf_result`` ellint_2(T k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k);
   
   template <class T>
-  ``__sf_result`` ellint_2(T k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_2(T k, const ``__Policy``&);
   
 Returns the complete elliptic integral of the second kind ['E(k)]:
 
@@ -230,16 +230,16 @@ and
   namespace boost { namespace math {
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
 
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_3(T1 k, T2 n);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
 
   }} // namespaces
   
@@ -255,10 +255,10 @@ when the arguments are of different types: when they are the same type then the
 is the same type as the arguments.
 
   template <class T1, class T2, class T3>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi);
   
   template <class T1, class T2, class T3, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, T3 phi, const ``__Policy``&);
   
 Returns the incomplete elliptic integral of the third kind ['[Pi](n, [phi], k)]:
 
@@ -271,10 +271,10 @@ would be complex).
 [optional_policy]
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_3(T1 k, T2 n);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n);
   
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
+  BOOST_MATH_CUDA_ENABLED ``__sf_result`` ellint_3(T1 k, T2 n, const ``__Policy``&);
   
 Returns the complete elliptic integral of the first kind ['[Pi](n, k)]:
 
@@ -355,16 +355,16 @@ and
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_d(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_d(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi, const ``__Policy``&);
 
   template <class T1>
-  ``__sf_result`` ellint_d(T1 k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k);
 
   template <class T1, class ``__Policy``>
-  ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
 
   }} // namespaces
   
@@ -378,10 +378,10 @@ when the arguments are of different types: when they are the same type then the
 is the same type as the arguments.
 
   template <class T1, class T2>
-  ``__sf_result`` ellint_d(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, T2 phi);
   
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` ellint_3(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_3(T1 k, T2 phi, const ``__Policy``&);
   
 Returns the incomplete elliptic integral:
 
@@ -394,10 +394,10 @@ would be complex).
 [optional_policy]
 
   template <class T1>
-  ``__sf_result`` ellint_d(T1 k);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k);
   
   template <class T1, class ``__Policy``>
-  ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` ellint_d(T1 k, const ``__Policy``&);
   
 Returns the complete elliptic integral ['D(k) = D([pi]/2, k)]
 
@@ -463,10 +463,10 @@ using the relation:
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` jacobi_zeta(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` jacobi_zeta(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` jacobi_zeta(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` jacobi_zeta(T1 k, T2 phi, const ``__Policy``&);
 
   }} // namespaces
   
@@ -543,10 +543,10 @@ is [@../../example/jacobi_zeta_example.cpp jacobi_zeta_example.cpp].
   namespace boost { namespace math {
 
   template <class T1, class T2>
-  ``__sf_result`` heuman_lambda(T1 k, T2 phi);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` heuman_lambda(T1 k, T2 phi);
 
   template <class T1, class T2, class ``__Policy``>
-  ``__sf_result`` heuman_lambda(T1 k, T2 phi, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` heuman_lambda(T1 k, T2 phi, const ``__Policy``&);
 
   }} // namespaces
   
diff --git a/doc/sf/erf.qbk b/doc/sf/erf.qbk
index 3207b66c07..5f6bdf9fa5 100644
--- a/doc/sf/erf.qbk
+++ b/doc/sf/erf.qbk
@@ -9,16 +9,16 @@
    namespace boost{ namespace math{
    
    template <class T>
-   ``__sf_result`` erf(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erf(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z, const ``__Policy``&);
    
    template <class T>
-   ``__sf_result`` erfc(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erfc(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z, const ``__Policy``&);
    
    }} // namespaces
    
@@ -30,10 +30,10 @@ the return type is `double` if T is an integer type, and T otherwise.
 [h4 Description]
 
    template <class T>
-   ``__sf_result`` erf(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erf(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf(T z, const ``__Policy``&);
    
 Returns the [@http://en.wikipedia.org/wiki/Error_function error function]
 [@http://functions.wolfram.com/GammaBetaErf/Erf/ erf] of z:
@@ -43,10 +43,10 @@ Returns the [@http://en.wikipedia.org/wiki/Error_function error function]
 [graph erf]
 
    template <class T>
-   ``__sf_result`` erfc(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erfc(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc(T z, const ``__Policy``&);
    
 Returns the complement of the [@http://functions.wolfram.com/GammaBetaErf/Erfc/ error function] of z:
 
diff --git a/doc/sf/erf_inv.qbk b/doc/sf/erf_inv.qbk
index 729ec22d28..e8f7464e09 100644
--- a/doc/sf/erf_inv.qbk
+++ b/doc/sf/erf_inv.qbk
@@ -9,16 +9,16 @@
    namespace boost{ namespace math{
    
    template <class T>
-   ``__sf_result`` erf_inv(T p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T p);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erf_inv(T p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T p, const ``__Policy``&);
    
    template <class T>
-   ``__sf_result`` erfc_inv(T p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T p);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erfc_inv(T p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T p, const ``__Policy``&);
    
    }} // namespaces
    
@@ -30,10 +30,10 @@ the return type is `double` if T is an integer type, and T otherwise.
 [h4 Description]
 
    template <class T>
-   ``__sf_result`` erf_inv(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erf_inv(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erf_inv(T z, const ``__Policy``&);
    
 Returns the [@http://functions.wolfram.com/GammaBetaErf/InverseErf/ inverse error function]
 of z, that is a value x such that:
@@ -43,10 +43,10 @@ of z, that is a value x such that:
 [graph erf_inv]
 
    template <class T>
-   ``__sf_result`` erfc_inv(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` erfc_inv(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` erfc_inv(T z, const ``__Policy``&);
    
 Returns the inverse of the complement of the error function of z, that is a
 value x such that:
diff --git a/doc/sf/expint.qbk b/doc/sf/expint.qbk
index 89554730d5..f0abf090e7 100644
--- a/doc/sf/expint.qbk
+++ b/doc/sf/expint.qbk
@@ -11,10 +11,10 @@
    namespace boost{ namespace math{
    
    template <class T>
-   ``__sf_result`` expint(unsigned n, T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&);
    
    }} // namespaces
    
@@ -26,10 +26,10 @@ the return type is `double` if T is an integer type, and T otherwise.
 [h4 Description]
 
    template <class T>
-   ``__sf_result`` expint(unsigned n, T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(unsigned n, T z, const ``__Policy``&);
    
 Returns the [@http://mathworld.wolfram.com/En-Function.html exponential integral En]
 of z:
@@ -100,10 +100,10 @@ is used.
    namespace boost{ namespace math{
    
    template <class T>
-   ``__sf_result`` expint(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` expint(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z, const ``__Policy``&);
    
    }} // namespaces
    
@@ -115,10 +115,10 @@ the return type is `double` if T is an integer type, and T otherwise.
 [h4 Description]
 
    template <class T>
-   ``__sf_result`` expint(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` expint(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` expint(T z, const ``__Policy``&);
    
 Returns the [@http://mathworld.wolfram.com/ExponentialIntegral.html exponential integral]
 of z:
diff --git a/doc/sf/gamma_derivatives.qbk b/doc/sf/gamma_derivatives.qbk
index c7dd248799..1b578d8d98 100644
--- a/doc/sf/gamma_derivatives.qbk
+++ b/doc/sf/gamma_derivatives.qbk
@@ -9,10 +9,10 @@
    namespace boost{ namespace math{ 
    
    template <class T1, class T2>
-   ``__sf_result`` gamma_p_derivative(T1 a, T2 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_derivative(T1 a, T2 x);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_p_derivative(T1 a, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_derivative(T1 a, T2 x, const ``__Policy``&);
    
    }} // namespaces
    
diff --git a/doc/sf/gamma_ratios.qbk b/doc/sf/gamma_ratios.qbk
index a3fcf864cb..0d076890d3 100644
--- a/doc/sf/gamma_ratios.qbk
+++ b/doc/sf/gamma_ratios.qbk
@@ -7,26 +7,26 @@
    namespace boost{ namespace math{
    
    template <class T1, class T2>
-   ``__sf_result`` tgamma_ratio(T1 a, T2 b);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&);
    
    template <class T1, class T2>
-   ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta);
    
    template <class T1, class T2, class Policy>
-   ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&);
    
    }} // namespaces
    
 [h4 Description]
 
    template <class T1, class T2> 
-   ``__sf_result`` tgamma_ratio(T1 a, T2 b);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b);
    
    template <class T1, class T2, class ``__Policy``> 
-   ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_ratio(T1 a, T2 b, const ``__Policy``&);
    
 Returns the ratio of gamma functions:
 
@@ -37,10 +37,10 @@ Returns the ratio of gamma functions:
 Internally this just calls `tgamma_delta_ratio(a, b-a)`.
    
    template <class T1, class T2>
-   ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_delta_ratio(T1 a, T2 delta, const ``__Policy``&);
    
 Returns the ratio of gamma functions:
 
diff --git a/doc/sf/gegenbauer.qbk b/doc/sf/gegenbauer.qbk
index a6afc53d82..69671917c8 100644
--- a/doc/sf/gegenbauer.qbk
+++ b/doc/sf/gegenbauer.qbk
@@ -16,13 +16,13 @@
    namespace boost{ namespace math{
 
    template<typename Real>
-   Real gegenbauer(unsigned n, Real lambda, Real x);
+   BOOST_MATH_GPU_ENABLED Real gegenbauer(unsigned n, Real lambda, Real x);
 
    template<typename Real>
-   Real gegenbauer_prime(unsigned n, Real lambda, Real x);
+   BOOST_MATH_GPU_ENABLED Real gegenbauer_prime(unsigned n, Real lambda, Real x);
 
    template<typename Real>
-   Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k);
+   BOOST_MATH_GPU_ENABLED Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k);
 
    }} // namespaces
 
diff --git a/doc/sf/hankel.qbk b/doc/sf/hankel.qbk
index 4d8a5eda1e..05d65201b1 100644
--- a/doc/sf/hankel.qbk
+++ b/doc/sf/hankel.qbk
@@ -3,18 +3,36 @@
 
 [h4 Synopsis]
 
+   #if !defined(__CUDACC__) && !defined(__CUDACC_RTC__)
+
    template <class T1, class T2>
-   std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x);
 
    template <class T1, class T2, class ``__Policy``>
-   std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x, const ``__Policy``&);
 
    template <class T1, class T2>
-   std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x);
    
    template <class T1, class T2, class ``__Policy``>
-   std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x, const ``__Policy``&);
    
+   #else // When using cuda we use namespace cuda::std:: instead of std::
+
+   template <class T1, class T2>
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x);
+
+   template <class T1, class T2, class ``__Policy``>
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_1(T1 v, T2 x, const ``__Policy``&);
+
+   template <class T1, class T2>
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x);
+   
+   template <class T1, class T2, class ``__Policy``>
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> cyl_hankel_2(T1 v, T2 x, const ``__Policy``&);
+
+   #endif
+
    
 [h4 Description]
 
@@ -77,18 +95,35 @@ routines for integer order are used.
 
 [h4 Synopsis]
 
+   #if !defined(__CUDACC__) && !defined(__CUDACC_RTC__)
+
    template <class T1, class T2>
-   std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x);
 
    template <class T1, class T2, class ``__Policy``>
-   std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x, const ``__Policy``&);
 
    template <class T1, class T2>
-   std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x);
+   
+   template <class T1, class T2, class ``__Policy``>
+   BOOST_MATH_GPU_ENABLED std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x, const ``__Policy``&);
    
+   #else // When using cuda we use namespace cuda::std:: instead of std::
+
+   template <class T1, class T2>
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x);
+
    template <class T1, class T2, class ``__Policy``>
-   std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_1(T1 v, T2 x, const ``__Policy``&);
+
+   template <class T1, class T2>
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x);
    
+   template <class T1, class T2, class ``__Policy``>
+   BOOST_MATH_GPU_ENABLED cuda::std::complex<``__sf_result``> sph_hankel_2(T1 v, T2 x, const ``__Policy``&);
+
+   #endif
    
 [h4 Description]
 
diff --git a/doc/sf/hermite.qbk b/doc/sf/hermite.qbk
index c88aadc344..965aa80928 100644
--- a/doc/sf/hermite.qbk
+++ b/doc/sf/hermite.qbk
@@ -9,13 +9,13 @@
    namespace boost{ namespace math{
    
    template <class T>
-   ``__sf_result`` hermite(unsigned n, T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1);
       
    }} // namespaces
 
@@ -26,10 +26,10 @@ note than when there is a single template argument the result is the same type
 as that argument or `double` if the template argument is an integer type.
 
    template <class T>
-   ``__sf_result`` hermite(unsigned n, T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite(unsigned n, T x, const ``__Policy``&);
    
 Returns the value of the Hermite Polynomial of order /n/ at point /x/:
 
@@ -43,7 +43,7 @@ Hermite Polynomials:
 [graph hermite]
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1);
    
 Implements the three term recurrence relation for the Hermite
 polynomials, this function can be used to create a sequence of
diff --git a/doc/sf/ibeta.qbk b/doc/sf/ibeta.qbk
index b4a20f9286..5227b2d342 100644
--- a/doc/sf/ibeta.qbk
+++ b/doc/sf/ibeta.qbk
@@ -9,28 +9,28 @@
    namespace boost{ namespace math{
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` beta(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` betac(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&);
    
    }} // namespaces
    
@@ -57,10 +57,10 @@ when T1, T2 and T3 are different types.
 [optional_policy]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x);
 
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta(T1 a, T2 b, T3 x, const ``__Policy``&);
 
 Returns the normalised incomplete beta function of a, b and x:
 
@@ -69,30 +69,30 @@ Returns the normalised incomplete beta function of a, b and x:
 [graph ibeta]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac(T1 a, T2 b, T3 x, const ``__Policy``&);
    
 Returns the normalised complement of the incomplete beta function of a, b and x:
 
 [equation ibeta4]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` beta(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x);
 
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` beta(T1 a, T2 b, T3 x, const ``__Policy``&);
 
 Returns the full (non-normalised) incomplete beta function of a, b and x:
 
 [equation ibeta1]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` betac(T1 a, T2 b, T3 x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x);
 
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` betac(T1 a, T2 b, T3 x, const ``__Policy``&);
 
 Returns the full (non-normalised) complement of the incomplete beta function of a, b and x:
 
diff --git a/doc/sf/ibeta_inv.qbk b/doc/sf/ibeta_inv.qbk
index 83c2b00086..60049db465 100644
--- a/doc/sf/ibeta_inv.qbk
+++ b/doc/sf/ibeta_inv.qbk
@@ -7,52 +7,52 @@
    namespace boost{ namespace math{
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&);
    
    template <class T1, class T2, class T3, class T4>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py);
    
    template <class T1, class T2, class T3, class T4, class ``__Policy``>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&);
    
    template <class T1, class T2, class T3, class T4>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py);
    
    template <class T1, class T2, class T3, class T4, class ``__Policy``>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 q, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_invb(T1 a, T2 x, T3 p, const ``__Policy``&);
    
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_invb(T1 a, T2 x, T3 q, const ``__Policy``&);
    
    }} // namespaces
    
@@ -81,16 +81,16 @@ The return type of these functions is computed using the __arg_promotion_rules
 when called with arguments T1...TN of different types.
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, const ``__Policy``&);
    
    template <class T1, class T2, class T3, class T4>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py);
    
    template <class T1, class T2, class T3, class T4, class ``__Policy``>
-   ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibeta_inv(T1 a, T2 b, T3 p, T4* py, const ``__Policy``&);
    
 Returns a value /x/ such that: `p = ibeta(a, b, x);` 
 and sets `*py = 1 - x` when the `py` parameter is provided and is non-null.  
@@ -104,16 +104,16 @@ Requires:  /a,b > 0/ and /0 <= p <= 1/.
 [optional_policy]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, const ``__Policy``&);
    
    template <class T1, class T2, class T3, class T4>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py);
    
    template <class T1, class T2, class T3, class T4, class ``__Policy``>
-   ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_inv(T1 a, T2 b, T3 q, T4* py, const ``__Policy``&);
    
 Returns a value /x/ such that: `q = ibetac(a, b, x);`
 and sets `*py = 1 - x` when the `py` parameter is provided and is non-null.  
@@ -127,10 +127,10 @@ Requires:  /a,b > 0/ and /0 <= q <= 1/.
 [optional_policy]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p);
 
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_inva(T1 b, T2 x, T3 p, const ``__Policy``&);
 
 Returns a value /a/ such that: `p = ibeta(a, b, x);`
 
@@ -139,10 +139,10 @@ Requires:  /b > 0/, /0 < x < 1/ and /0 <= p <= 1/.
 [optional_policy]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_inva(T1 b, T2 x, T3 p, const ``__Policy``&);
    
 Returns a value /a/ such that: `q = ibetac(a, b, x);`
 
@@ -151,10 +151,10 @@ Requires:  /b > 0/, /0 < x < 1/ and /0 <= q <= 1/.
 [optional_policy]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibeta_invb(T1 b, T2 x, T3 p, const ``__Policy``&);
 
 Returns a value /b/ such that: `p = ibeta(a, b, x);`
 
@@ -163,10 +163,10 @@ Requires:  /a > 0/, /0 < x < 1/ and /0 <= p <= 1/.
 [optional_policy]
 
    template <class T1, class T2, class T3>
-   ``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p);
+   BOOST_MATH_GPU_ENABLED``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p);
    
    template <class T1, class T2, class T3, class ``__Policy``>
-   ``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` ibetac_invb(T1 b, T2 x, T3 p, const ``__Policy``&);
    
 Returns a value /b/ such that: `q = ibetac(a, b, x);`
 
diff --git a/doc/sf/igamma.qbk b/doc/sf/igamma.qbk
index ca354ad10f..4675928e63 100644
--- a/doc/sf/igamma.qbk
+++ b/doc/sf/igamma.qbk
@@ -9,28 +9,28 @@
    namespace boost{ namespace math{
    
    template <class T1, class T2>
-   ``__sf_result`` gamma_p(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&);
    
    template <class T1, class T2>
-   ``__sf_result`` gamma_q(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&);
    
    template <class T1, class T2>
-   ``__sf_result`` tgamma_lower(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&);
    
    template <class T1, class T2>
-   ``__sf_result`` tgamma(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&);
    
    }} // namespaces
    
@@ -53,10 +53,10 @@ The return type of these functions is computed using the __arg_promotion_rules
 when T1 and T2 are different types, otherwise the return type is simply T1.
 
    template <class T1, class T2>
-   ``__sf_result`` gamma_p(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z);
    
    template <class T1, class T2, class Policy>
-   ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p(T1 a, T2 z, const ``__Policy``&);
    
 Returns the normalised lower incomplete gamma function of a and z:
 
@@ -67,10 +67,10 @@ This function changes rapidly from 0 to 1 around the point z == a:
 [graph gamma_p]
 
    template <class T1, class T2>
-   ``__sf_result`` gamma_q(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q(T1 a, T2 z, const ``__Policy``&);
 
 Returns the normalised upper incomplete gamma function of a and z:
 
@@ -81,20 +81,20 @@ This function changes rapidly from 1 to 0 around the point z == a:
 [graph gamma_q]
 
    template <class T1, class T2>
-   ``__sf_result`` tgamma_lower(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma_lower(T1 a, T2 z, const ``__Policy``&);
 
 Returns the full (non-normalised) lower incomplete gamma function of a and z:
 
 [equation igamma2]
 
    template <class T1, class T2>
-   ``__sf_result`` tgamma(T1 a, T2 z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T1 a, T2 z, const ``__Policy``&);
 
 Returns the full (non-normalised) upper incomplete gamma function of a and z:
 
diff --git a/doc/sf/igamma_inv.qbk b/doc/sf/igamma_inv.qbk
index 593c92141b..55fe76e6e8 100644
--- a/doc/sf/igamma_inv.qbk
+++ b/doc/sf/igamma_inv.qbk
@@ -9,28 +9,28 @@
    namespace boost{ namespace math{
    
    template <class T1, class T2>
-   ``__sf_result`` gamma_q_inv(T1 a, T2 q);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&);
    
    template <class T1, class T2>
-   ``__sf_result`` gamma_p_inv(T1 a, T2 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&);
    
    template <class T1, class T2>
-   ``__sf_result`` gamma_q_inva(T1 x, T2 q);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&);
    
    template <class T1, class T2>
-   ``__sf_result`` gamma_p_inva(T1 x, T2 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&);
    
    }} // namespaces
    
@@ -58,40 +58,40 @@ These are implemented here as `gamma_p_inva` and `gamma_q_inva`.]
 
 
    template <class T1, class T2>
-   ``__sf_result`` gamma_q_inv(T1 a, T2 q);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inv(T1 a, T2 q, const ``__Policy``&);
 
 Returns a value x such that: `q = gamma_q(a, x);`
 
 Requires: /a > 0/ and /1 >= p,q >= 0/.
 
    template <class T1, class T2>
-   ``__sf_result`` gamma_p_inv(T1 a, T2 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inv(T1 a, T2 p, const ``__Policy``&);
    
 Returns a value x such that: `p = gamma_p(a, x);`
 
 Requires: /a > 0/ and /1 >= p,q >= 0/.
 
    template <class T1, class T2>
-   ``__sf_result`` gamma_q_inva(T1 x, T2 q);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q);
 
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_q_inva(T1 x, T2 q, const ``__Policy``&);
 
 Returns a value a such that: `q = gamma_q(a, x);`
 
 Requires: /x > 0/ and /1 >= p,q >= 0/.
 
    template <class T1, class T2>
-   ``__sf_result`` gamma_p_inva(T1 x, T2 p);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p);
    
    template <class T1, class T2, class ``__Policy``>
-   ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` gamma_p_inva(T1 x, T2 p, const ``__Policy``&);
    
 Returns a value a such that: `p = gamma_p(a, x);`
 
diff --git a/doc/sf/lgamma.qbk b/doc/sf/lgamma.qbk
index 5ea1a4e091..544485c7ca 100644
--- a/doc/sf/lgamma.qbk
+++ b/doc/sf/lgamma.qbk
@@ -9,16 +9,16 @@
    namespace boost{ namespace math{
    
    template <class T>
-   ``__sf_result`` lgamma(T z);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` lgamma(T z, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z, const ``__Policy``&);
    
    template <class T>
-   ``__sf_result`` lgamma(T z, int* sign);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z, int* sign);
    
    template <class T, class ``__Policy``>
-   ``__sf_result`` lgamma(T z, int* sign, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` lgamma(T z, int* sign, const ``__Policy``&);
    
    }} // namespaces
 
diff --git a/doc/sf/pow.qbk b/doc/sf/pow.qbk
index db021978e2..ecb762d711 100644
--- a/doc/sf/pow.qbk
+++ b/doc/sf/pow.qbk
@@ -10,10 +10,10 @@ power of a run-time base.
     namespace boost { namespace math {
 
     template <int N, typename T>
-    constexpr ``__sf_result`` pow(T base);
+    BOOST_MATH_GPU_ENABLED constexpr ``__sf_result`` pow(T base);
 
     template <int N, typename T, class Policy>
-    constexpr ``__sf_result`` pow(T base, const Policy& policy);
+    BOOST_MATH_GPU_ENABLED constexpr ``__sf_result`` pow(T base, const Policy& policy);
 
     }}
 
diff --git a/doc/sf/sinc.qbk b/doc/sf/sinc.qbk
index b345c08cd7..a6042a7171 100644
--- a/doc/sf/sinc.qbk
+++ b/doc/sf/sinc.qbk
@@ -43,16 +43,16 @@ and [@http://mathworld.wolfram.com/Octonion.html octonions].
 ``
 
    template<class T> 
-   ``__sf_result`` sinc_pi(const T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sinc_pi(const T x);
 
    template<class T, class ``__Policy``> 
-   ``__sf_result`` sinc_pi(const T x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sinc_pi(const T x, const ``__Policy``&);
 
    template<class T, template<typename> class U> 
-   U<T> sinc_pi(const U<T> x);
+   BOOST_MATH_GPU_ENABLED U<T> sinc_pi(const U<T> x);
 
    template<class T, template<typename> class U, class ``__Policy``> 
-   U<T> sinc_pi(const U<T> x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED U<T> sinc_pi(const U<T> x, const ``__Policy``&);
 
 Computes 
 [link math_toolkit.sinc.sinc_overview 
@@ -78,10 +78,10 @@ to ensure accuracy.
 ``
 
    template<class T> 
-   ``__sf_result`` sinhc_pi(const T x);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sinhc_pi(const T x);
 
    template<class T, class ``__Policy``> 
-   ``__sf_result`` sinhc_pi(const T x, const ``__Policy``&);
+   BOOST_MATH_GPU_ENABLED ``__sf_result`` sinhc_pi(const T x, const ``__Policy``&);
 
    template<typename T, template<typename> class U> 
    U<T> sinhc_pi(const U<T> x);
diff --git a/doc/sf/tgamma.qbk b/doc/sf/tgamma.qbk
index 7eb535ec3a..23baad2cb8 100644
--- a/doc/sf/tgamma.qbk
+++ b/doc/sf/tgamma.qbk
@@ -9,26 +9,26 @@
   namespace boost{ namespace math{
   
   template <class T>
-  ``__sf_result`` tgamma(T z);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z);
   
   template <class T, class ``__Policy``>
-  ``__sf_result`` tgamma(T z, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z, const ``__Policy``&);
   
   template <class T>
-  ``__sf_result`` tgamma1pm1(T dz);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz);
   
   template <class T, class ``__Policy``>
-  ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&);
   
   }} // namespaces
   
 [h4 Description]
 
   template <class T>
-  ``__sf_result`` tgamma(T z);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z);
   
   template <class T, class ``__Policy``>
-  ``__sf_result`` tgamma(T z, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma(T z, const ``__Policy``&);
   
 Returns the "true gamma" (hence name tgamma) of value z:
 
@@ -42,10 +42,10 @@ The return type of this function is computed using the __arg_promotion_rules:
 the result is `double` when T is an integer type, and T otherwise.
 
   template <class T>
-  ``__sf_result`` tgamma1pm1(T dz);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz);
   
   template <class T, class ``__Policy``>
-  ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` tgamma1pm1(T dz, const ``__Policy``&);
   
 Returns `tgamma(dz + 1) - 1`.  Internally the implementation does not make
 use of the addition and subtraction implied by the definition, leading to
diff --git a/doc/sf/trigamma.qbk b/doc/sf/trigamma.qbk
index 137a148d83..a358c85713 100644
--- a/doc/sf/trigamma.qbk
+++ b/doc/sf/trigamma.qbk
@@ -9,10 +9,10 @@
   namespace boost{ namespace math{
   
   template <class T>
-  ``__sf_result`` trigamma(T x);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` trigamma(T x);
   
   template <class T, class ``__Policy``>
-  ``__sf_result`` trigamma(T x, const ``__Policy``&);
+  BOOST_MATH_GPU_ENABLED ``__sf_result`` trigamma(T x, const ``__Policy``&);
   
   }} // namespaces
   
diff --git a/example/Jamfile.v2 b/example/Jamfile.v2
index 34e4a5a8c8..2a6ad6947d 100644
--- a/example/Jamfile.v2
+++ b/example/Jamfile.v2
@@ -7,10 +7,13 @@
 
 # bring in the rules for testing
 import testing ;
-import ../../config/checks/config : requires ;
+import-search /boost/config/checks ;
+import config : requires ;
 
 project
     : requirements
+      <library>/boost/math//boost_math
+      <library>/boost/multiprecision//boost_multiprecision
       <toolset>gcc:<cxxflags>-Wno-missing-braces
       <toolset>darwin:<cxxflags>-Wno-missing-braces
       <toolset>acc:<cxxflags>+W2068,2461,2236,4070
@@ -36,7 +39,6 @@ project
       <toolset>clang:<cxxflags>-Wno-unknown-pragmas
       <toolset>clang:<cxxflags>-Wno-language-extension-token
 
-      <include>../../..
       <include>../include_private
       <exception-handling>off:<source>../test//no_eh
       [ requires cxx11_noexcept cxx11_rvalue_references sfinae_expr cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_hdr_initializer_list cxx11_hdr_chrono cxx11_thread_local cxx11_constexpr cxx11_nullptr cxx11_numeric_limits cxx11_decltype cxx11_hdr_array cxx11_hdr_atomic cxx11_hdr_type_traits cxx11_allocator cxx11_explicit_conversion_operators ]
@@ -53,7 +55,7 @@ test-suite examples :
    [ run binomial_example_nag.cpp  ]
    [ run binomial_quiz_example.cpp : : : <exception-handling>off:<build>no  ]
    [ run binomial_sample_sizes.cpp  ]
-   [ run brent_minimise_example.cpp  : : : [ requires cxx11_hdr_tuple ] ]
+   [ run brent_minimise_example.cpp /boost/test//included : : : [ requires cxx11_hdr_tuple ] ]
 
    [ run c_error_policy_example.cpp  ]
    [ run chi_square_std_dev_test.cpp : : : <exception-handling>off:<build>no  ]
@@ -93,11 +95,11 @@ test-suite examples :
 
    [ run nonfinite_num_facet.cpp  ]
    [ run nonfinite_facet_simple.cpp  ]
-   #[ run nonfinite_num_facet_serialization.cpp ../../serialization/build//boost_serialization : : : <exception-handling>off:<build>no <toolset>gcc-mingw:<link>static  ]
+   #[ run nonfinite_num_facet_serialization.cpp /boost/serialization//boost_serialization : : : <exception-handling>off:<build>no <toolset>gcc-mingw:<link>static  ]
    #[ # run lexical_cast_native.cpp  ] # Expected to fail on some (but not all) platforms.
    [ run lexical_cast_nonfinite_facets.cpp  ]
    [ run nonfinite_loopback_ok.cpp  ]
-   #[ run nonfinite_serialization_archives.cpp ../../serialization/build//boost_serialization  : : : <exception-handling>off:<build>no <toolset>gcc-mingw:<link>static  ]
+   #[ run nonfinite_serialization_archives.cpp /boost/serialization//boost_serialization  : : : <exception-handling>off:<build>no <toolset>gcc-mingw:<link>static  ]
    [ run nonfinite_facet_sstream.cpp  ]
 
    [ run constants_eg1.cpp  ]
@@ -113,7 +115,7 @@ test-suite examples :
    [ run policy_eg_6.cpp  ]
    [ run policy_eg_7.cpp  ]
    [ run policy_eg_8.cpp  ]
-   [ run policy_eg_9.cpp  ]
+   [ run policy_eg_9.cpp /boost/format//boost_format ]
    [ run policy_ref_snip1.cpp : : : <exception-handling>off:<build>no  ]
    [ run policy_ref_snip10.cpp  ]
    [ run policy_ref_snip11.cpp  ]
diff --git a/include/boost/math/ccmath/copysign.hpp b/include/boost/math/ccmath/copysign.hpp
index 90a58102b1..e117e57faa 100644
--- a/include/boost/math/ccmath/copysign.hpp
+++ b/include/boost/math/ccmath/copysign.hpp
@@ -54,7 +54,7 @@ constexpr auto copysign(T1 mag, T2 sgn) noexcept
 {
     if (BOOST_MATH_IS_CONSTANT_EVALUATED(mag))
     {        
-        using promoted_type = boost::math::tools::promote_args_2_t<T1, T2>;
+        using promoted_type = boost::math::tools::promote_args_t<T1, T2>;
         return boost::math::ccmath::copysign(static_cast<promoted_type>(mag), static_cast<promoted_type>(sgn));
     }
     else
diff --git a/include/boost/math/ccmath/fdim.hpp b/include/boost/math/ccmath/fdim.hpp
index cdcbc223c6..d6b4e25cec 100644
--- a/include/boost/math/ccmath/fdim.hpp
+++ b/include/boost/math/ccmath/fdim.hpp
@@ -66,7 +66,7 @@ constexpr auto fdim(T1 x, T2 y) noexcept
 {
     if (BOOST_MATH_IS_CONSTANT_EVALUATED(x))
     {
-        using promoted_type = boost::math::tools::promote_args_2_t<T1, T2>;
+        using promoted_type = boost::math::tools::promote_args_t<T1, T2>;
         return boost::math::ccmath::fdim(promoted_type(x), promoted_type(y));
     }
     else
diff --git a/include/boost/math/ccmath/fmax.hpp b/include/boost/math/ccmath/fmax.hpp
index 237355275b..8a0d17d03e 100644
--- a/include/boost/math/ccmath/fmax.hpp
+++ b/include/boost/math/ccmath/fmax.hpp
@@ -62,7 +62,7 @@ constexpr auto fmax(T1 x, T2 y) noexcept
 {
     if (BOOST_MATH_IS_CONSTANT_EVALUATED(x))
     {
-        using promoted_type = boost::math::tools::promote_args_2_t<T1, T2>;
+        using promoted_type = boost::math::tools::promote_args_t<T1, T2>;
         return boost::math::ccmath::fmax(static_cast<promoted_type>(x), static_cast<promoted_type>(y));
     }
     else
diff --git a/include/boost/math/ccmath/fmin.hpp b/include/boost/math/ccmath/fmin.hpp
index 1c113e0d6e..29885b69c8 100644
--- a/include/boost/math/ccmath/fmin.hpp
+++ b/include/boost/math/ccmath/fmin.hpp
@@ -62,7 +62,7 @@ constexpr auto fmin(T1 x, T2 y) noexcept
 {
     if (BOOST_MATH_IS_CONSTANT_EVALUATED(x))
     {
-        using promoted_type = boost::math::tools::promote_args_2_t<T1, T2>;
+        using promoted_type = boost::math::tools::promote_args_t<T1, T2>;
         return boost::math::ccmath::fmin(static_cast<promoted_type>(x), static_cast<promoted_type>(y));
     }
     else
diff --git a/include/boost/math/ccmath/hypot.hpp b/include/boost/math/ccmath/hypot.hpp
index 4e0e245b4e..34dd5ab2c0 100644
--- a/include/boost/math/ccmath/hypot.hpp
+++ b/include/boost/math/ccmath/hypot.hpp
@@ -89,7 +89,7 @@ constexpr auto hypot(T1 x, T2 y) noexcept
 {
     if(BOOST_MATH_IS_CONSTANT_EVALUATED(x))
     {
-        using promoted_type = boost::math::tools::promote_args_2_t<T1, T2>;
+        using promoted_type = boost::math::tools::promote_args_t<T1, T2>;
         return boost::math::ccmath::hypot(static_cast<promoted_type>(x), static_cast<promoted_type>(y));
     }
     else
diff --git a/include/boost/math/ccmath/isinf.hpp b/include/boost/math/ccmath/isinf.hpp
index f1e00e34f5..ecf0d620ab 100644
--- a/include/boost/math/ccmath/isinf.hpp
+++ b/include/boost/math/ccmath/isinf.hpp
@@ -22,7 +22,14 @@ constexpr bool isinf BOOST_MATH_PREVENT_MACRO_SUBSTITUTION(T x) noexcept
     {
         if constexpr (std::numeric_limits<T>::is_signed)
         {
+#if defined(__clang_major__) && __clang_major__ >= 6
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wtautological-constant-compare"
+#endif
             return x == std::numeric_limits<T>::infinity() || -x == std::numeric_limits<T>::infinity();
+#if defined(__clang_major__) && __clang_major__ >= 6
+#pragma clang diagnostic pop
+#endif
         }
         else
         {
@@ -32,7 +39,7 @@ constexpr bool isinf BOOST_MATH_PREVENT_MACRO_SUBSTITUTION(T x) noexcept
     else
     {
         using boost::math::isinf;
-        
+
         if constexpr (!std::is_integral_v<T>)
         {
             return (isinf)(x);
diff --git a/include/boost/math/concepts/std_real_concept.hpp b/include/boost/math/concepts/std_real_concept.hpp
index f77935c7fb..43f562efe1 100644
--- a/include/boost/math/concepts/std_real_concept.hpp
+++ b/include/boost/math/concepts/std_real_concept.hpp
@@ -229,19 +229,22 @@ inline boost::math::concepts::std_real_concept (nextafter)(boost::math::concepts
 { return (boost::math::nextafter)(a, b); }
 //
 // C++11 ism's
-// Note that these must not actually call the std:: versions as that precludes using this
-// header to test in C++03 mode, call the Boost versions instead:
+// Now that we only support C++11 and later, we can allow use of these:
 //
 inline boost::math::concepts::std_real_concept asinh(boost::math::concepts::std_real_concept a)
-{ return boost::math::asinh(a.value(), boost::math::policies::make_policy(boost::math::policies::overflow_error<boost::math::policies::ignore_error>())); }
+{ return std::asinh(a.value()); }
 inline boost::math::concepts::std_real_concept acosh(boost::math::concepts::std_real_concept a)
-{ return boost::math::acosh(a.value(), boost::math::policies::make_policy(boost::math::policies::overflow_error<boost::math::policies::ignore_error>())); }
+{ return std::acosh(a.value()); }
 inline boost::math::concepts::std_real_concept atanh(boost::math::concepts::std_real_concept a)
-{ return boost::math::atanh(a.value(), boost::math::policies::make_policy(boost::math::policies::overflow_error<boost::math::policies::ignore_error>())); }
+{ return std::atanh(a.value()); }
 inline bool (isfinite)(boost::math::concepts::std_real_concept a)
 {
    return (boost::math::isfinite)(a.value());
 }
+inline boost::math::concepts::std_real_concept log2(boost::math::concepts::std_real_concept a)
+{ return std::log2(a.value()); }
+inline int ilogb(boost::math::concepts::std_real_concept a)
+{ return std::ilogb(a.value()); }
 
 
 } // namespace std
diff --git a/include/boost/math/constants/constants.hpp b/include/boost/math/constants/constants.hpp
index 4bf81c61d1..df702bf899 100644
--- a/include/boost/math/constants/constants.hpp
+++ b/include/boost/math/constants/constants.hpp
@@ -1,5 +1,6 @@
 //  Copyright John Maddock 2005-2006, 2011.
 //  Copyright Paul A. Bristow 2006-2011.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -8,6 +9,9 @@
 #define BOOST_MATH_CONSTANTS_CONSTANTS_INCLUDED
 
 #include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <boost/math/tools/cxx03_warn.hpp>
 #include <boost/math/policies/policy.hpp>
 #include <boost/math/tools/precision.hpp>
@@ -209,11 +213,11 @@ namespace boost{ namespace math
       constant_initializer<T, & BOOST_MATH_JOIN(constant_, name)<T>::get_from_string >::force_instantiate();\
       return get_from_string();\
    }\
-   static inline constexpr T get(const std::integral_constant<int, construct_from_float>) noexcept\
+   BOOST_MATH_GPU_ENABLED static inline constexpr T get(const std::integral_constant<int, construct_from_float>) noexcept\
    { return BOOST_MATH_JOIN(x, F); }\
-   static inline constexpr T get(const std::integral_constant<int, construct_from_double>&) noexcept\
+   BOOST_MATH_GPU_ENABLED static inline constexpr T get(const std::integral_constant<int, construct_from_double>&) noexcept\
    { return x; }\
-   static inline constexpr T get(const std::integral_constant<int, construct_from_long_double>&) noexcept\
+   BOOST_MATH_GPU_ENABLED static inline constexpr T get(const std::integral_constant<int, construct_from_long_double>&) noexcept\
    { return BOOST_MATH_JOIN(x, L); }\
    BOOST_MATH_FLOAT128_CONSTANT_OVERLOAD(x) \
    template <int N> static inline const T& get(const std::integral_constant<int, N>&)\
@@ -231,9 +235,9 @@ namespace boost{ namespace math
    \
    \
    /* The actual forwarding function: */ \
-   template <typename T, typename Policy> inline constexpr typename detail::constant_return<T, Policy>::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(Policy)) BOOST_MATH_NOEXCEPT(T)\
+   template <typename T, typename Policy> BOOST_MATH_GPU_ENABLED inline constexpr typename detail::constant_return<T, Policy>::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(Policy)) BOOST_MATH_NOEXCEPT(T)\
    { return detail:: BOOST_MATH_JOIN(constant_, name)<T>::get(typename construction_traits<T, Policy>::type()); }\
-   template <typename T> inline constexpr typename detail::constant_return<T>::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T)\
+   template <typename T> BOOST_MATH_GPU_ENABLED inline constexpr typename detail::constant_return<T>::type name(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) BOOST_MATH_NOEXCEPT(T)\
    { return name<T, boost::math::policies::policy<> >(); }\
    \
    \
@@ -243,6 +247,16 @@ namespace boost{ namespace math
    namespace long_double_constants{ static constexpr long double name = BOOST_MATH_JOIN(x, L); }\
    namespace constants{
 
+#else // NVRTC simplified macro definition
+
+#define BOOST_DEFINE_MATH_CONSTANT(name, value, str_value) template <typename T> BOOST_MATH_GPU_ENABLED constexpr T name() noexcept { return static_cast<T>(value); }
+
+namespace boost {
+namespace math {
+namespace constants {
+
+#endif
+
   BOOST_DEFINE_MATH_CONSTANT(half, 5.000000000000000000000000000000000000e-01, "5.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e-01")
   BOOST_DEFINE_MATH_CONSTANT(third, 3.333333333333333333333333333333333333e-01, "3.33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333e-01")
   BOOST_DEFINE_MATH_CONSTANT(twothirds, 6.666666666666666666666666666666666666e-01, "6.66666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666667e-01")
@@ -318,17 +332,15 @@ namespace boost{ namespace math
   BOOST_DEFINE_MATH_CONSTANT(one_div_pi, 0.3183098861837906715377675267450287240689192, "0.31830988618379067153776752674502872406891929148091289749533468811779359526845307018022760553250617191214568545351")
   BOOST_DEFINE_MATH_CONSTANT(two_div_root_pi, 1.12837916709551257389615890312154517168810125, "1.12837916709551257389615890312154517168810125865799771368817144342128493688298682897348732040421472688605669581272")
 
-#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900)
   BOOST_DEFINE_MATH_CONSTANT(first_feigenbaum, 4.66920160910299067185320382046620161725818557747576863274,  "4.6692016091029906718532038204662016172581855774757686327456513430041343302113147371386897440239480138171")
   BOOST_DEFINE_MATH_CONSTANT(plastic, 1.324717957244746025960908854478097340734404056901733364534, "1.32471795724474602596090885447809734073440405690173336453401505030282785124554759405469934798178728032991")
   BOOST_DEFINE_MATH_CONSTANT(gauss, 0.834626841674073186281429732799046808993993013490347002449, "0.83462684167407318628142973279904680899399301349034700244982737010368199270952641186969116035127532412906785")
   BOOST_DEFINE_MATH_CONSTANT(dottie, 0.739085133215160641655312087673873404013411758900757464965, "0.739085133215160641655312087673873404013411758900757464965680635773284654883547594599376106931766531849801246")
   BOOST_DEFINE_MATH_CONSTANT(reciprocal_fibonacci, 3.35988566624317755317201130291892717968890513, "3.35988566624317755317201130291892717968890513373196848649555381532513031899668338361541621645679008729704")
   BOOST_DEFINE_MATH_CONSTANT(laplace_limit, 0.662743419349181580974742097109252907056233549115022417, "0.66274341934918158097474209710925290705623354911502241752039253499097185308651127724965480259895818168")
-#endif
 
 template <typename T>
-inline constexpr T tau() {  return two_pi<T>(); }
+BOOST_MATH_GPU_ENABLED inline constexpr T tau() {  return two_pi<T>(); }
 
 } // namespace constants
 } // namespace math
@@ -338,7 +350,11 @@ inline constexpr T tau() {  return two_pi<T>(); }
 // We deliberately include this *after* all the declarations above,
 // that way the calculation routines can call on other constants above:
 //
+// NVRTC will not have a type that needs runtime calculation
+//
+#ifndef BOOST_MATH_HAS_NVRTC
 #include <boost/math/constants/calculate_constants.hpp>
+#endif
 
 #endif // BOOST_MATH_CONSTANTS_CONSTANTS_INCLUDED
 
diff --git a/include/boost/math/differentiation/autodiff.hpp b/include/boost/math/differentiation/autodiff.hpp
index 7a57aa2f92..b8880f24de 100644
--- a/include/boost/math/differentiation/autodiff.hpp
+++ b/include/boost/math/differentiation/autodiff.hpp
@@ -39,7 +39,7 @@ namespace detail {
 
 template <typename RealType, typename... RealTypes>
 struct promote_args_n {
-  using type = typename tools::promote_args_2<RealType, typename promote_args_n<RealTypes...>::type>::type;
+  using type = typename tools::promote_args<RealType, typename promote_args_n<RealTypes...>::type>::type;
 };
 
 template <typename RealType>
@@ -2002,9 +2002,9 @@ using autodiff_root_type = typename autodiff_fvar_type<RealType, Order>::root_ty
 
 // See boost/math/tools/promotion.hpp
 template <typename RealType0, size_t Order0, typename RealType1, size_t Order1>
-struct promote_args_2<detail::autodiff_fvar_type<RealType0, Order0>,
+struct promote_args<detail::autodiff_fvar_type<RealType0, Order0>,
                       detail::autodiff_fvar_type<RealType1, Order1>> {
-  using type = detail::autodiff_fvar_type<typename promote_args_2<RealType0, RealType1>::type,
+  using type = detail::autodiff_fvar_type<typename promote_args<RealType0, RealType1>::type,
 #ifndef BOOST_MATH_NO_CXX14_CONSTEXPR
                                           (std::max)(Order0, Order1)>;
 #else
@@ -2018,13 +2018,13 @@ struct promote_args<detail::autodiff_fvar_type<RealType, Order>> {
 };
 
 template <typename RealType0, size_t Order0, typename RealType1>
-struct promote_args_2<detail::autodiff_fvar_type<RealType0, Order0>, RealType1> {
-  using type = detail::autodiff_fvar_type<typename promote_args_2<RealType0, RealType1>::type, Order0>;
+struct promote_args<detail::autodiff_fvar_type<RealType0, Order0>, RealType1> {
+  using type = detail::autodiff_fvar_type<typename promote_args<RealType0, RealType1>::type, Order0>;
 };
 
 template <typename RealType0, typename RealType1, size_t Order1>
-struct promote_args_2<RealType0, detail::autodiff_fvar_type<RealType1, Order1>> {
-  using type = detail::autodiff_fvar_type<typename promote_args_2<RealType0, RealType1>::type, Order1>;
+struct promote_args<RealType0, detail::autodiff_fvar_type<RealType1, Order1>> {
+  using type = detail::autodiff_fvar_type<typename promote_args<RealType0, RealType1>::type, Order1>;
 };
 
 template <typename destination_t, typename RealType, std::size_t Order>
diff --git a/include/boost/math/distributions.hpp b/include/boost/math/distributions.hpp
index 64da99415e..0834db870a 100644
--- a/include/boost/math/distributions.hpp
+++ b/include/boost/math/distributions.hpp
@@ -24,15 +24,18 @@
 #include <boost/math/distributions/fisher_f.hpp>
 #include <boost/math/distributions/gamma.hpp>
 #include <boost/math/distributions/geometric.hpp>
+#include <boost/math/distributions/holtsmark.hpp>
 #include <boost/math/distributions/hyperexponential.hpp>
 #include <boost/math/distributions/hypergeometric.hpp>
 #include <boost/math/distributions/inverse_chi_squared.hpp>
 #include <boost/math/distributions/inverse_gamma.hpp>
 #include <boost/math/distributions/inverse_gaussian.hpp>
 #include <boost/math/distributions/kolmogorov_smirnov.hpp>
+#include <boost/math/distributions/landau.hpp>
 #include <boost/math/distributions/laplace.hpp>
 #include <boost/math/distributions/logistic.hpp>
 #include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/distributions/mapairy.hpp>
 #include <boost/math/distributions/negative_binomial.hpp>
 #include <boost/math/distributions/non_central_chi_squared.hpp>
 #include <boost/math/distributions/non_central_beta.hpp>
@@ -42,6 +45,7 @@
 #include <boost/math/distributions/pareto.hpp>
 #include <boost/math/distributions/poisson.hpp>
 #include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/distributions/saspoint5.hpp>
 #include <boost/math/distributions/skew_normal.hpp>
 #include <boost/math/distributions/students_t.hpp>
 #include <boost/math/distributions/triangular.hpp>
diff --git a/include/boost/math/distributions/arcsine.hpp b/include/boost/math/distributions/arcsine.hpp
index a8fcbbc05f..899bfb1b2b 100644
--- a/include/boost/math/distributions/arcsine.hpp
+++ b/include/boost/math/distributions/arcsine.hpp
@@ -2,6 +2,7 @@
 
 // Copyright John Maddock 2014.
 // Copyright Paul A. Bristow 2014.
+// Copyright Matt Borland 2024.
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -29,13 +30,21 @@
 #ifndef BOOST_MATH_DIST_ARCSINE_HPP
 #define BOOST_MATH_DIST_ARCSINE_HPP
 
-#include <cmath>
-#include <boost/math/distributions/fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/distributions/complement.hpp> // complements.
 #include <boost/math/distributions/detail/common_error_handling.hpp> // error checks.
 #include <boost/math/constants/constants.hpp>
-
 #include <boost/math/special_functions/fpclassify.hpp> // isnan.
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
+#include <cmath>
+#include <utility>
+#include <exception>  // For std::domain_error.
+#endif
 
 #if defined (BOOST_MSVC)
 #  pragma warning(push)
@@ -43,9 +52,6 @@
 // in domain_error_imp in error_handling.
 #endif
 
-#include <utility>
-#include <exception>  // For std::domain_error.
-
 namespace boost
 {
   namespace math
@@ -55,7 +61,7 @@ namespace boost
       // Common error checking routines for arcsine distribution functions:
       // Duplicating for x_min and x_max provides specific error messages.
       template <class RealType, class Policy>
-      inline bool check_x_min(const char* function, const RealType& x, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_x_min(const char* function, const RealType& x, RealType* result, const Policy& pol)
       {
         if (!(boost::math::isfinite)(x))
         {
@@ -68,7 +74,7 @@ namespace boost
       } // bool check_x_min
 
       template <class RealType, class Policy>
-      inline bool check_x_max(const char* function, const RealType& x, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_x_max(const char* function, const RealType& x, RealType* result, const Policy& pol)
       {
         if (!(boost::math::isfinite)(x))
         {
@@ -82,14 +88,14 @@ namespace boost
 
 
       template <class RealType, class Policy>
-      inline bool check_x_minmax(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_x_minmax(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol)
       { // Check x_min < x_max
         if (x_min >= x_max)
         {
-          std::string msg = "x_max argument is %1%, but must be > x_min";
+          constexpr auto msg = "x_max argument is %1%, but must be > x_min";
           *result = policies::raise_domain_error<RealType>(
             function,
-            msg.c_str(), x_max, pol);
+            msg, x_max, pol);
             // "x_max argument is %1%, but must be > x_min !", x_max, pol);
             // "x_max argument is %1%, but must be > x_min %2!", x_max, x_min, pol); would be better. 
             // But would require replication of all helpers functions in /policies/error_handling.hpp for two values,
@@ -100,7 +106,7 @@ namespace boost
       } // bool check_x_minmax
 
       template <class RealType, class Policy>
-      inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
       {
         if ((p < 0) || (p > 1) || !(boost::math::isfinite)(p))
         {
@@ -113,7 +119,7 @@ namespace boost
       } // bool check_prob
 
       template <class RealType, class Policy>
-      inline bool check_x(const char* function, const RealType& x_min, const RealType& x_max, const RealType& x, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_x(const char* function, const RealType& x_min, const RealType& x_max, const RealType& x, RealType* result, const Policy& pol)
       { // Check x finite and x_min < x < x_max.
         if (!(boost::math::isfinite)(x))
         {
@@ -137,7 +143,7 @@ namespace boost
       } // bool check_x
 
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& x_min, const RealType& x_max, RealType* result, const Policy& pol)
       { // Check both x_min and x_max finite, and x_min  < x_max.
         return check_x_min(function, x_min, result, pol)
             && check_x_max(function, x_max, result, pol)
@@ -145,14 +151,14 @@ namespace boost
       } // bool check_dist
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_x(const char* function, const RealType& x_min, const RealType& x_max, RealType x, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_x(const char* function, const RealType& x_min, const RealType& x_max, RealType x, RealType* result, const Policy& pol)
       {
         return check_dist(function, x_min, x_max, result, pol)
           && arcsine_detail::check_x(function, x_min, x_max, x, result, pol);
       } // bool check_dist_and_x
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_prob(const char* function, const RealType& x_min, const RealType& x_max, RealType p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& x_min, const RealType& x_max, RealType p, RealType* result, const Policy& pol)
       {
         return check_dist(function, x_min, x_max, result, pol)
           && check_prob(function, p, result, pol);
@@ -167,7 +173,7 @@ namespace boost
       typedef RealType value_type;
       typedef Policy policy_type;
 
-      arcsine_distribution(RealType x_min = 0, RealType x_max = 1) : m_x_min(x_min), m_x_max(x_max)
+      BOOST_MATH_GPU_ENABLED arcsine_distribution(RealType x_min = 0, RealType x_max = 1) : m_x_min(x_min), m_x_max(x_max)
       { // Default beta (alpha = beta = 0.5) is standard arcsine with x_min = 0, x_max = 1.
         // Generalized to allow x_min and x_max to be specified.
         RealType result;
@@ -178,11 +184,11 @@ namespace boost
           &result, Policy());
       } // arcsine_distribution constructor.
       // Accessor functions:
-      RealType x_min() const
+      BOOST_MATH_GPU_ENABLED RealType x_min() const
       {
         return m_x_min;
       }
-      RealType x_max() const
+      BOOST_MATH_GPU_ENABLED RealType x_max() const
       {
         return m_x_max;
       }
@@ -203,21 +209,21 @@ namespace boost
     #endif
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const arcsine_distribution<RealType, Policy>&  dist)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const arcsine_distribution<RealType, Policy>&  dist)
     { // Range of permissible values for random variable x.
       using boost::math::tools::max_value;
-      return std::pair<RealType, RealType>(static_cast<RealType>(dist.x_min()), static_cast<RealType>(dist.x_max()));
+      return boost::math::pair<RealType, RealType>(static_cast<RealType>(dist.x_min()), static_cast<RealType>(dist.x_max()));
     }
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const arcsine_distribution<RealType, Policy>&  dist)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const arcsine_distribution<RealType, Policy>&  dist)
     { // Range of supported values for random variable x.
       // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-      return std::pair<RealType, RealType>(static_cast<RealType>(dist.x_min()), static_cast<RealType>(dist.x_max()));
+      return boost::math::pair<RealType, RealType>(static_cast<RealType>(dist.x_min()), static_cast<RealType>(dist.x_max()));
     }
 
     template <class RealType, class Policy>
-    inline RealType mean(const arcsine_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const arcsine_distribution<RealType, Policy>& dist)
     { // Mean of arcsine distribution .
       RealType result;
       RealType x_min = dist.x_min();
@@ -236,7 +242,7 @@ namespace boost
     } // mean
 
     template <class RealType, class Policy>
-    inline RealType variance(const arcsine_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const arcsine_distribution<RealType, Policy>& dist)
     { // Variance of standard arcsine distribution = (1-0)/8 = 0.125.
       RealType result;
       RealType x_min = dist.x_min();
@@ -254,7 +260,7 @@ namespace boost
     } // variance
 
     template <class RealType, class Policy>
-    inline RealType mode(const arcsine_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const arcsine_distribution<RealType, Policy>& /* dist */)
     { //There are always [*two] values for the mode, at ['x_min] and at ['x_max], default 0 and 1,
       // so instead we raise the exception domain_error.
       return policies::raise_domain_error<RealType>(
@@ -265,7 +271,7 @@ namespace boost
     } // mode
 
     template <class RealType, class Policy>
-    inline RealType median(const arcsine_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType median(const arcsine_distribution<RealType, Policy>& dist)
     { // Median of arcsine distribution (a + b) / 2 == mean.
       RealType x_min = dist.x_min();
       RealType x_max = dist.x_max();
@@ -283,7 +289,7 @@ namespace boost
     }
 
     template <class RealType, class Policy>
-    inline RealType skewness(const arcsine_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const arcsine_distribution<RealType, Policy>& dist)
     {
       RealType result;
       RealType x_min = dist.x_min();
@@ -302,7 +308,7 @@ namespace boost
     } // skewness
 
     template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const arcsine_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const arcsine_distribution<RealType, Policy>& dist)
     {
       RealType result;
       RealType x_min = dist.x_min();
@@ -322,7 +328,7 @@ namespace boost
     } // kurtosis_excess
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const arcsine_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const arcsine_distribution<RealType, Policy>& dist)
     {
       RealType result;
       RealType x_min = dist.x_min();
@@ -342,12 +348,12 @@ namespace boost
     } // kurtosis
 
     template <class RealType, class Policy>
-    inline RealType pdf(const arcsine_distribution<RealType, Policy>& dist, const RealType& xx)
+    BOOST_MATH_GPU_ENABLED inline RealType pdf(const arcsine_distribution<RealType, Policy>& dist, const RealType& xx)
     { // Probability Density/Mass Function arcsine.
       BOOST_FPU_EXCEPTION_GUARD
       BOOST_MATH_STD_USING // For ADL of std functions.
 
-      static const char* function = "boost::math::pdf(arcsine_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::pdf(arcsine_distribution<%1%> const&, %1%)";
 
       RealType lo = dist.x_min();
       RealType hi = dist.x_max();
@@ -368,11 +374,11 @@ namespace boost
     } // pdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const arcsine_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const arcsine_distribution<RealType, Policy>& dist, const RealType& x)
     { // Cumulative Distribution Function arcsine.
       BOOST_MATH_STD_USING // For ADL of std functions.
 
-      static const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)";
 
       RealType x_min = dist.x_min();
       RealType x_max = dist.x_max();
@@ -401,10 +407,10 @@ namespace boost
     } // arcsine cdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const complemented2_type<arcsine_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<arcsine_distribution<RealType, Policy>, RealType>& c)
     { // Complemented Cumulative Distribution Function arcsine.
       BOOST_MATH_STD_USING // For ADL of std functions.
-      static const char* function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::cdf(arcsine_distribution<%1%> const&, %1%)";
 
       RealType x = c.param;
       arcsine_distribution<RealType, Policy> const& dist = c.dist;
@@ -437,7 +443,7 @@ namespace boost
     } // arcsine ccdf
 
     template <class RealType, class Policy>
-    inline RealType quantile(const arcsine_distribution<RealType, Policy>& dist, const RealType& p)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const arcsine_distribution<RealType, Policy>& dist, const RealType& p)
     { 
       // Quantile or Percent Point arcsine function or
       // Inverse Cumulative probability distribution function CDF.
@@ -451,7 +457,7 @@ namespace boost
 
       using boost::math::constants::half_pi;
 
-      static const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)";
 
       RealType result = 0; // of argument checks:
       RealType x_min = dist.x_min();
@@ -481,7 +487,7 @@ namespace boost
     } // quantile
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<arcsine_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<arcsine_distribution<RealType, Policy>, RealType>& c)
     { 
       // Complement Quantile or Percent Point arcsine function.
       // Return the number of expected x for a given
@@ -489,7 +495,7 @@ namespace boost
       BOOST_MATH_STD_USING // For ADL of std functions.
 
       using boost::math::constants::half_pi;
-      static const char* function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::quantile(arcsine_distribution<%1%> const&, %1%)";
 
       // Error checks:
       RealType q = c.param;
diff --git a/include/boost/math/distributions/bernoulli.hpp b/include/boost/math/distributions/bernoulli.hpp
index cce209a6fb..f1c693f7f0 100644
--- a/include/boost/math/distributions/bernoulli.hpp
+++ b/include/boost/math/distributions/bernoulli.hpp
@@ -2,6 +2,7 @@
 
 // Copyright John Maddock 2006.
 // Copyright Paul A. Bristow 2007.
+// Copyright Matt Borland 2024.
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -27,13 +28,19 @@
 #ifndef BOOST_MATH_SPECIAL_BERNOULLI_HPP
 #define BOOST_MATH_SPECIAL_BERNOULLI_HPP
 
-#include <boost/math/distributions/fwd.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/distributions/complement.hpp> // complements
 #include <boost/math/distributions/detail/common_error_handling.hpp> // error checks
 #include <boost/math/special_functions/fpclassify.hpp> // isnan.
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
+#ifndef BOOST_MATH_HAS_NVRTC
 #include <utility>
+#include <boost/math/distributions/fwd.hpp>
+#endif
 
 namespace boost
 {
@@ -43,7 +50,7 @@ namespace boost
     {
       // Common error checking routines for bernoulli distribution functions:
       template <class RealType, class Policy>
-      inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
+      BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
       {
         if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1))
         {
@@ -55,23 +62,23 @@ namespace boost
         return true;
       }
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const std::true_type&)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */, const boost::math::true_type&)
       {
         return check_success_fraction(function, p, result, Policy());
       }
       template <class RealType, class Policy>
-      inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const std::false_type&)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* , const RealType& , RealType* , const Policy& /* pol */, const boost::math::false_type&)
       {
          return true;
       }
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& /* pol */)
       {
          return check_dist(function, p, result, Policy(), typename policies::constructor_error_check<Policy>::type());
       }
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& p, RealType k, RealType* result, const Policy& pol)
       {
         if(check_dist(function, p, result, Policy(), typename policies::method_error_check<Policy>::type()) == false)
         {
@@ -87,7 +94,7 @@ namespace boost
        return true;
       }
       template <class RealType, class Policy>
-      inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& /* pol */)
       {
         if((check_dist(function, p, result, Policy(), typename policies::method_error_check<Policy>::type()) && detail::check_probability(function, prob, result, Policy())) == false)
         {
@@ -105,7 +112,7 @@ namespace boost
       typedef RealType value_type;
       typedef Policy policy_type;
 
-      bernoulli_distribution(RealType p = 0.5) : m_p(p)
+      BOOST_MATH_GPU_ENABLED bernoulli_distribution(RealType p = 0.5) : m_p(p)
       { // Default probability = half suits 'fair' coin tossing
         // where probability of heads == probability of tails.
         RealType result; // of checks.
@@ -115,7 +122,7 @@ namespace boost
           &result, Policy());
       } // bernoulli_distribution constructor.
 
-      RealType success_fraction() const
+      BOOST_MATH_GPU_ENABLED RealType success_fraction() const
       { // Probability.
         return m_p;
       }
@@ -132,21 +139,21 @@ namespace boost
     #endif
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const bernoulli_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const bernoulli_distribution<RealType, Policy>& /* dist */)
     { // Range of permissible values for random variable k = {0, 1}.
       using boost::math::tools::max_value;
-      return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
+      return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
     }
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const bernoulli_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const bernoulli_distribution<RealType, Policy>& /* dist */)
     { // Range of supported values for random variable k = {0, 1}.
       // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-      return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
+      return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
     }
 
     template <class RealType, class Policy>
-    inline RealType mean(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const bernoulli_distribution<RealType, Policy>& dist)
     { // Mean of bernoulli distribution = p (n = 1).
       return dist.success_fraction();
     } // mean
@@ -159,13 +166,13 @@ namespace boost
     //} // median
 
     template <class RealType, class Policy>
-    inline RealType variance(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const bernoulli_distribution<RealType, Policy>& dist)
     { // Variance of bernoulli distribution =p * q.
       return  dist.success_fraction() * (1 - dist.success_fraction());
     } // variance
 
     template <class RealType, class Policy>
-    RealType pdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED RealType pdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
     { // Probability Density/Mass Function.
       BOOST_FPU_EXCEPTION_GUARD
       // Error check:
@@ -190,7 +197,7 @@ namespace boost
     } // pdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const bernoulli_distribution<RealType, Policy>& dist, const RealType& k)
     { // Cumulative Distribution Function Bernoulli.
       RealType p = dist.success_fraction();
       // Error check:
@@ -214,7 +221,7 @@ namespace boost
     } // bernoulli cdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
     { // Complemented Cumulative Distribution Function bernoulli.
       RealType const& k = c.param;
       bernoulli_distribution<RealType, Policy> const& dist = c.dist;
@@ -240,7 +247,7 @@ namespace boost
     } // bernoulli cdf complement
 
     template <class RealType, class Policy>
-    inline RealType quantile(const bernoulli_distribution<RealType, Policy>& dist, const RealType& p)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const bernoulli_distribution<RealType, Policy>& dist, const RealType& p)
     { // Quantile or Percent Point Bernoulli function.
       // Return the number of expected successes k either 0 or 1.
       // for a given probability p.
@@ -265,7 +272,7 @@ namespace boost
     } // quantile
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<bernoulli_distribution<RealType, Policy>, RealType>& c)
     { // Quantile or Percent Point bernoulli function.
       // Return the number of expected successes k for a given
       // complement of the probability q.
@@ -294,13 +301,13 @@ namespace boost
     } // quantile complemented.
 
     template <class RealType, class Policy>
-    inline RealType mode(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const bernoulli_distribution<RealType, Policy>& dist)
     {
       return static_cast<RealType>((dist.success_fraction() <= 0.5) ? 0 : 1); // p = 0.5 can be 0 or 1
     }
 
     template <class RealType, class Policy>
-    inline RealType skewness(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const bernoulli_distribution<RealType, Policy>& dist)
     {
       BOOST_MATH_STD_USING; // Aid ADL for sqrt.
       RealType p = dist.success_fraction();
@@ -308,7 +315,7 @@ namespace boost
     }
 
     template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const bernoulli_distribution<RealType, Policy>& dist)
     {
       RealType p = dist.success_fraction();
       // Note Wolfram says this is kurtosis in text, but gamma2 is the kurtosis excess,
@@ -319,7 +326,7 @@ namespace boost
     }
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const bernoulli_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const bernoulli_distribution<RealType, Policy>& dist)
     {
       RealType p = dist.success_fraction();
       return 1 / (1 - p) + 1/p -6 + 3;
diff --git a/include/boost/math/distributions/beta.hpp b/include/boost/math/distributions/beta.hpp
index 6c17ffa1a2..fef991a870 100644
--- a/include/boost/math/distributions/beta.hpp
+++ b/include/boost/math/distributions/beta.hpp
@@ -25,12 +25,15 @@
 #ifndef BOOST_MATH_DIST_BETA_HPP
 #define BOOST_MATH_DIST_BETA_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/beta.hpp> // for beta.
 #include <boost/math/distributions/complement.hpp> // complements.
 #include <boost/math/distributions/detail/common_error_handling.hpp> // error checks
 #include <boost/math/special_functions/fpclassify.hpp> // isnan.
 #include <boost/math/tools/roots.hpp> // for root finding.
+#include <boost/math/policies/error_handling.hpp>
 
 #if defined (BOOST_MSVC)
 #  pragma warning(push)
@@ -38,8 +41,6 @@
 // in domain_error_imp in error_handling
 #endif
 
-#include <utility>
-
 namespace boost
 {
   namespace math
@@ -48,7 +49,7 @@ namespace boost
     {
       // Common error checking routines for beta distribution functions:
       template <class RealType, class Policy>
-      inline bool check_alpha(const char* function, const RealType& alpha, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_alpha(const char* function, const RealType& alpha, RealType* result, const Policy& pol)
       {
         if(!(boost::math::isfinite)(alpha) || (alpha <= 0))
         {
@@ -61,7 +62,7 @@ namespace boost
       } // bool check_alpha
 
       template <class RealType, class Policy>
-      inline bool check_beta(const char* function, const RealType& beta, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_beta(const char* function, const RealType& beta, RealType* result, const Policy& pol)
       {
         if(!(boost::math::isfinite)(beta) || (beta <= 0))
         {
@@ -74,7 +75,7 @@ namespace boost
       } // bool check_beta
 
       template <class RealType, class Policy>
-      inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
       {
         if((p < 0) || (p > 1) || !(boost::math::isfinite)(p))
         {
@@ -87,7 +88,7 @@ namespace boost
       } // bool check_prob
 
       template <class RealType, class Policy>
-      inline bool check_x(const char* function, const RealType& x, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_x(const char* function, const RealType& x, RealType* result, const Policy& pol)
       {
         if(!(boost::math::isfinite)(x) || (x < 0) || (x > 1))
         {
@@ -100,28 +101,28 @@ namespace boost
       } // bool check_x
 
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& alpha, const RealType& beta, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& alpha, const RealType& beta, RealType* result, const Policy& pol)
       { // Check both alpha and beta.
         return check_alpha(function, alpha, result, pol)
           && check_beta(function, beta, result, pol);
       } // bool check_dist
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_x(const char* function, const RealType& alpha, const RealType& beta, RealType x, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_x(const char* function, const RealType& alpha, const RealType& beta, RealType x, RealType* result, const Policy& pol)
       {
         return check_dist(function, alpha, beta, result, pol)
           && beta_detail::check_x(function, x, result, pol);
       } // bool check_dist_and_x
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_prob(const char* function, const RealType& alpha, const RealType& beta, RealType p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& alpha, const RealType& beta, RealType p, RealType* result, const Policy& pol)
       {
         return check_dist(function, alpha, beta, result, pol)
           && check_prob(function, p, result, pol);
       } // bool check_dist_and_prob
 
       template <class RealType, class Policy>
-      inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol)
       {
         if(!(boost::math::isfinite)(mean) || (mean <= 0))
         {
@@ -133,7 +134,7 @@ namespace boost
         return true;
       } // bool check_mean
       template <class RealType, class Policy>
-      inline bool check_variance(const char* function, const RealType& variance, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_variance(const char* function, const RealType& variance, RealType* result, const Policy& pol)
       {
         if(!(boost::math::isfinite)(variance) || (variance <= 0))
         {
@@ -157,7 +158,7 @@ namespace boost
       typedef RealType value_type;
       typedef Policy policy_type;
 
-      beta_distribution(RealType l_alpha = 1, RealType l_beta = 1) : m_alpha(l_alpha), m_beta(l_beta)
+      BOOST_MATH_GPU_ENABLED beta_distribution(RealType l_alpha = 1, RealType l_beta = 1) : m_alpha(l_alpha), m_beta(l_beta)
       {
         RealType result;
         beta_detail::check_dist(
@@ -167,11 +168,11 @@ namespace boost
           &result, Policy());
       } // beta_distribution constructor.
       // Accessor functions:
-      RealType alpha() const
+      BOOST_MATH_GPU_ENABLED RealType alpha() const
       {
         return m_alpha;
       }
-      RealType beta() const
+      BOOST_MATH_GPU_ENABLED RealType beta() const
       { // .
         return m_beta;
       }
@@ -183,11 +184,11 @@ namespace boost
       // http://www.itl.nist.gov/div898/handbook/eda/section3/eda366h.htm
       // http://www.epi.ucdavis.edu/diagnostictests/betabuster.html
 
-      static RealType find_alpha(
+      BOOST_MATH_GPU_ENABLED static RealType find_alpha(
         RealType mean, // Expected value of mean.
         RealType variance) // Expected value of variance.
       {
-        static const char* function = "boost::math::beta_distribution<%1%>::find_alpha";
+        constexpr auto function = "boost::math::beta_distribution<%1%>::find_alpha";
         RealType result = 0; // of error checks.
         if(false ==
             (
@@ -201,11 +202,11 @@ namespace boost
         return mean * (( (mean * (1 - mean)) / variance)- 1);
       } // RealType find_alpha
 
-      static RealType find_beta(
+      BOOST_MATH_GPU_ENABLED static RealType find_beta(
         RealType mean, // Expected value of mean.
         RealType variance) // Expected value of variance.
       {
-        static const char* function = "boost::math::beta_distribution<%1%>::find_beta";
+        constexpr auto function = "boost::math::beta_distribution<%1%>::find_beta";
         RealType result = 0; // of error checks.
         if(false ==
             (
@@ -223,12 +224,12 @@ namespace boost
       // Estimate alpha & beta from either alpha or beta, and x and probability.
       // Uses for these parameter estimators are unclear.
 
-      static RealType find_alpha(
+      BOOST_MATH_GPU_ENABLED static RealType find_alpha(
         RealType beta, // from beta.
         RealType x, //  x.
         RealType probability) // cdf
       {
-        static const char* function = "boost::math::beta_distribution<%1%>::find_alpha";
+        constexpr auto function = "boost::math::beta_distribution<%1%>::find_alpha";
         RealType result = 0; // of error checks.
         if(false ==
             (
@@ -245,13 +246,13 @@ namespace boost
         return static_cast<RealType>(ibeta_inva(beta, x, probability, Policy()));
       } // RealType find_alpha(beta, a, probability)
 
-      static RealType find_beta(
+      BOOST_MATH_GPU_ENABLED static RealType find_beta(
         // ibeta_invb(T b, T x, T p); (alpha, x, cdf,)
         RealType alpha, // alpha.
         RealType x, // probability x.
         RealType probability) // probability cdf.
       {
-        static const char* function = "boost::math::beta_distribution<%1%>::find_beta";
+        constexpr auto function = "boost::math::beta_distribution<%1%>::find_beta";
         RealType result = 0; // of error checks.
         if(false ==
             (
@@ -281,27 +282,27 @@ namespace boost
     #endif
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const beta_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const beta_distribution<RealType, Policy>& /* dist */)
     { // Range of permissible values for random variable x.
       using boost::math::tools::max_value;
-      return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
+      return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
     }
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const beta_distribution<RealType, Policy>&  /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const beta_distribution<RealType, Policy>&  /* dist */)
     { // Range of supported values for random variable x.
       // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-      return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
+      return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
     }
 
     template <class RealType, class Policy>
-    inline RealType mean(const beta_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const beta_distribution<RealType, Policy>& dist)
     { // Mean of beta distribution = np.
       return  dist.alpha() / (dist.alpha() + dist.beta());
     } // mean
 
     template <class RealType, class Policy>
-    inline RealType variance(const beta_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const beta_distribution<RealType, Policy>& dist)
     { // Variance of beta distribution = np(1-p).
       RealType a = dist.alpha();
       RealType b = dist.beta();
@@ -309,9 +310,9 @@ namespace boost
     } // variance
 
     template <class RealType, class Policy>
-    inline RealType mode(const beta_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const beta_distribution<RealType, Policy>& dist)
     {
-      static const char* function = "boost::math::mode(beta_distribution<%1%> const&)";
+      constexpr auto function = "boost::math::mode(beta_distribution<%1%> const&)";
 
       RealType result;
       if ((dist.alpha() <= 1))
@@ -343,7 +344,7 @@ namespace boost
     //But WILL be provided by the derived accessor as quantile(0.5).
 
     template <class RealType, class Policy>
-    inline RealType skewness(const beta_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const beta_distribution<RealType, Policy>& dist)
     {
       BOOST_MATH_STD_USING // ADL of std functions.
       RealType a = dist.alpha();
@@ -352,7 +353,7 @@ namespace boost
     } // skewness
 
     template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const beta_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const beta_distribution<RealType, Policy>& dist)
     {
       RealType a = dist.alpha();
       RealType b = dist.beta();
@@ -363,17 +364,17 @@ namespace boost
     } // kurtosis_excess
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const beta_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const beta_distribution<RealType, Policy>& dist)
     {
       return 3 + kurtosis_excess(dist);
     } // kurtosis
 
     template <class RealType, class Policy>
-    inline RealType pdf(const beta_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType pdf(const beta_distribution<RealType, Policy>& dist, const RealType& x)
     { // Probability Density/Mass Function.
       BOOST_FPU_EXCEPTION_GUARD
 
-      static const char* function = "boost::math::pdf(beta_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::pdf(beta_distribution<%1%> const&, %1%)";
 
       BOOST_MATH_STD_USING // for ADL of std functions
 
@@ -428,11 +429,11 @@ namespace boost
     } // pdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const beta_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const beta_distribution<RealType, Policy>& dist, const RealType& x)
     { // Cumulative Distribution Function beta.
       BOOST_MATH_STD_USING // for ADL of std functions
 
-      static const char* function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)";
 
       RealType a = dist.alpha();
       RealType b = dist.beta();
@@ -459,12 +460,12 @@ namespace boost
     } // beta cdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const complemented2_type<beta_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<beta_distribution<RealType, Policy>, RealType>& c)
     { // Complemented Cumulative Distribution Function beta.
 
       BOOST_MATH_STD_USING // for ADL of std functions
 
-      static const char* function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::cdf(beta_distribution<%1%> const&, %1%)";
 
       RealType const& x = c.param;
       beta_distribution<RealType, Policy> const& dist = c.dist;
@@ -495,7 +496,7 @@ namespace boost
     } // beta cdf
 
     template <class RealType, class Policy>
-    inline RealType quantile(const beta_distribution<RealType, Policy>& dist, const RealType& p)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const beta_distribution<RealType, Policy>& dist, const RealType& p)
     { // Quantile or Percent Point beta function or
       // Inverse Cumulative probability distribution function CDF.
       // Return x (0 <= x <= 1),
@@ -505,7 +506,7 @@ namespace boost
       // will be less than or equal to that value
       // is whatever probability you supplied as an argument.
 
-      static const char* function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)";
 
       RealType result = 0; // of argument checks:
       RealType a = dist.alpha();
@@ -530,12 +531,12 @@ namespace boost
     } // quantile
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<beta_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<beta_distribution<RealType, Policy>, RealType>& c)
     { // Complement Quantile or Percent Point beta function .
       // Return the number of expected x for a given
       // complement of the probability q.
 
-      static const char* function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)";
+      constexpr auto function = "boost::math::quantile(beta_distribution<%1%> const&, %1%)";
 
       //
       // Error checks:
diff --git a/include/boost/math/distributions/binomial.hpp b/include/boost/math/distributions/binomial.hpp
index cf7451104b..b17893e422 100644
--- a/include/boost/math/distributions/binomial.hpp
+++ b/include/boost/math/distributions/binomial.hpp
@@ -79,6 +79,8 @@
 #ifndef BOOST_MATH_SPECIAL_BINOMIAL_HPP
 #define BOOST_MATH_SPECIAL_BINOMIAL_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/beta.hpp> // for incomplete beta.
 #include <boost/math/distributions/complement.hpp> // complements
@@ -100,7 +102,7 @@ namespace boost
      namespace binomial_detail{
         // common error checking routines for binomial distribution functions:
         template <class RealType, class Policy>
-        inline bool check_N(const char* function, const RealType& N, RealType* result, const Policy& pol)
+        BOOST_MATH_CUDA_ENABLED inline bool check_N(const char* function, const RealType& N, RealType* result, const Policy& pol)
         {
            if((N < 0) || !(boost::math::isfinite)(N))
            {
@@ -112,7 +114,7 @@ namespace boost
            return true;
         }
         template <class RealType, class Policy>
-        inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
+        BOOST_MATH_CUDA_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
         {
            if((p < 0) || (p > 1) || !(boost::math::isfinite)(p))
            {
@@ -124,7 +126,7 @@ namespace boost
            return true;
         }
         template <class RealType, class Policy>
-        inline bool check_dist(const char* function, const RealType& N, const RealType& p, RealType* result, const Policy& pol)
+        BOOST_MATH_CUDA_ENABLED inline bool check_dist(const char* function, const RealType& N, const RealType& p, RealType* result, const Policy& pol)
         {
            return check_success_fraction(
               function, p, result, pol)
@@ -132,7 +134,7 @@ namespace boost
                function, N, result, pol);
         }
         template <class RealType, class Policy>
-        inline bool check_dist_and_k(const char* function, const RealType& N, const RealType& p, RealType k, RealType* result, const Policy& pol)
+        BOOST_MATH_CUDA_ENABLED inline bool check_dist_and_k(const char* function, const RealType& N, const RealType& p, RealType k, RealType* result, const Policy& pol)
         {
            if(check_dist(function, N, p, result, pol) == false)
               return false;
@@ -153,7 +155,7 @@ namespace boost
            return true;
         }
         template <class RealType, class Policy>
-        inline bool check_dist_and_prob(const char* function, const RealType& N, RealType p, RealType prob, RealType* result, const Policy& pol)
+        BOOST_MATH_CUDA_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& N, RealType p, RealType prob, RealType* result, const Policy& pol)
         {
            if((check_dist(function, N, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false)
               return false;
@@ -161,7 +163,7 @@ namespace boost
         }
 
          template <class T, class Policy>
-         T inverse_binomial_cornish_fisher(T n, T sf, T p, T q, const Policy& pol)
+         BOOST_MATH_CUDA_ENABLED T inverse_binomial_cornish_fisher(T n, T sf, T p, T q, const Policy& pol)
          {
             BOOST_MATH_STD_USING
             // mean:
@@ -196,7 +198,7 @@ namespace boost
          }
 
       template <class RealType, class Policy>
-      RealType quantile_imp(const binomial_distribution<RealType, Policy>& dist, const RealType& p, const RealType& q, bool comp)
+      BOOST_MATH_CUDA_ENABLED RealType quantile_imp(const binomial_distribution<RealType, Policy>& dist, const RealType& p, const RealType& q, bool comp)
       { // Quantile or Percent Point Binomial function.
         // Return the number of expected successes k,
         // for a given probability p.
@@ -290,11 +292,11 @@ namespace boost
             &r, Policy());
       } // binomial_distribution constructor.
 
-      RealType success_fraction() const
+      BOOST_MATH_CUDA_ENABLED RealType success_fraction() const
       { // Probability.
         return m_p;
       }
-      RealType trials() const
+      BOOST_MATH_CUDA_ENABLED RealType trials() const
       { // Total number of trials.
         return m_n;
       }
@@ -310,13 +312,13 @@ namespace boost
       // these functions are used
       // to obtain confidence intervals for the success fraction.
       //
-      static RealType find_lower_bound_on_p(
+      BOOST_MATH_CUDA_ENABLED static RealType find_lower_bound_on_p(
          RealType trials,
          RealType successes,
          RealType probability,
          interval_type t = clopper_pearson_exact_interval)
       {
-        static const char* function = "boost::math::binomial_distribution<%1%>::find_lower_bound_on_p";
+        BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_lower_bound_on_p";
         // Error checks:
         RealType result = 0;
         if(false == binomial_detail::check_dist_and_k(
@@ -335,13 +337,13 @@ namespace boost
         return (t == clopper_pearson_exact_interval) ? ibeta_inv(successes, trials - successes + 1, probability, static_cast<RealType*>(nullptr), Policy())
            : ibeta_inv(successes + 0.5f, trials - successes + 0.5f, probability, static_cast<RealType*>(nullptr), Policy());
       }
-      static RealType find_upper_bound_on_p(
+      BOOST_MATH_CUDA_ENABLED static RealType find_upper_bound_on_p(
          RealType trials,
          RealType successes,
          RealType probability,
          interval_type t = clopper_pearson_exact_interval)
       {
-        static const char* function = "boost::math::binomial_distribution<%1%>::find_upper_bound_on_p";
+        BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_upper_bound_on_p";
         // Error checks:
         RealType result = 0;
         if(false == binomial_detail::check_dist_and_k(
@@ -363,12 +365,12 @@ namespace boost
       //    or
       // "How many trials can I have to be P% sure of seeing fewer than k events?"
       //
-      static RealType find_minimum_number_of_trials(
+      BOOST_MATH_CUDA_ENABLED static RealType find_minimum_number_of_trials(
          RealType k,     // number of events
          RealType p,     // success fraction
          RealType alpha) // risk level
       {
-        static const char* function = "boost::math::binomial_distribution<%1%>::find_minimum_number_of_trials";
+        BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_minimum_number_of_trials";
         // Error checks:
         RealType result = 0;
         if(false == binomial_detail::check_dist_and_k(
@@ -382,12 +384,12 @@ namespace boost
         return result + k;
       }
 
-      static RealType find_maximum_number_of_trials(
+      BOOST_MATH_CUDA_ENABLED static RealType find_maximum_number_of_trials(
          RealType k,     // number of events
          RealType p,     // success fraction
          RealType alpha) // risk level
       {
-        static const char* function = "boost::math::binomial_distribution<%1%>::find_maximum_number_of_trials";
+        BOOST_MATH_STATIC const char* function = "boost::math::binomial_distribution<%1%>::find_maximum_number_of_trials";
         // Error checks:
         RealType result = 0;
         if(false == binomial_detail::check_dist_and_k(
@@ -419,33 +421,33 @@ namespace boost
       #endif
 
       template <class RealType, class Policy>
-      const std::pair<RealType, RealType> range(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED const boost::math::pair<RealType, RealType> range(const binomial_distribution<RealType, Policy>& dist)
       { // Range of permissible values for random variable k.
         using boost::math::tools::max_value;
-        return std::pair<RealType, RealType>(static_cast<RealType>(0), dist.trials());
+        return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), dist.trials());
       }
 
       template <class RealType, class Policy>
-      const std::pair<RealType, RealType> support(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED const boost::math::pair<RealType, RealType> support(const binomial_distribution<RealType, Policy>& dist)
       { // Range of supported values for random variable k.
         // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-        return std::pair<RealType, RealType>(static_cast<RealType>(0),  dist.trials());
+        return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  dist.trials());
       }
 
       template <class RealType, class Policy>
-      inline RealType mean(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED inline RealType mean(const binomial_distribution<RealType, Policy>& dist)
       { // Mean of Binomial distribution = np.
         return  dist.trials() * dist.success_fraction();
       } // mean
 
       template <class RealType, class Policy>
-      inline RealType variance(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED inline RealType variance(const binomial_distribution<RealType, Policy>& dist)
       { // Variance of Binomial distribution = np(1-p).
         return  dist.trials() * dist.success_fraction() * (1 - dist.success_fraction());
       } // variance
 
       template <class RealType, class Policy>
-      RealType pdf(const binomial_distribution<RealType, Policy>& dist, const RealType& k)
+      BOOST_MATH_CUDA_ENABLED RealType pdf(const binomial_distribution<RealType, Policy>& dist, const RealType& k)
       { // Probability Density/Mass Function.
         BOOST_FPU_EXCEPTION_GUARD
 
@@ -501,7 +503,7 @@ namespace boost
       } // pdf
 
       template <class RealType, class Policy>
-      inline RealType cdf(const binomial_distribution<RealType, Policy>& dist, const RealType& k)
+      BOOST_MATH_CUDA_ENABLED inline RealType cdf(const binomial_distribution<RealType, Policy>& dist, const RealType& k)
       { // Cumulative Distribution Function Binomial.
         // The random variate k is the number of successes in n trials.
         // k argument may be integral, signed, or unsigned, or floating point.
@@ -573,7 +575,7 @@ namespace boost
       } // binomial cdf
 
       template <class RealType, class Policy>
-      inline RealType cdf(const complemented2_type<binomial_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_CUDA_ENABLED inline RealType cdf(const complemented2_type<binomial_distribution<RealType, Policy>, RealType>& c)
       { // Complemented Cumulative Distribution Function Binomial.
         // The random variate k is the number of successes in n trials.
         // k argument may be integral, signed, or unsigned, or floating point.
@@ -650,19 +652,19 @@ namespace boost
       } // binomial cdf
 
       template <class RealType, class Policy>
-      inline RealType quantile(const binomial_distribution<RealType, Policy>& dist, const RealType& p)
+      BOOST_MATH_CUDA_ENABLED inline RealType quantile(const binomial_distribution<RealType, Policy>& dist, const RealType& p)
       {
          return binomial_detail::quantile_imp(dist, p, RealType(1-p), false);
       } // quantile
 
       template <class RealType, class Policy>
-      RealType quantile(const complemented2_type<binomial_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_CUDA_ENABLED RealType quantile(const complemented2_type<binomial_distribution<RealType, Policy>, RealType>& c)
       {
          return binomial_detail::quantile_imp(c.dist, RealType(1-c.param), c.param, true);
       } // quantile
 
       template <class RealType, class Policy>
-      inline RealType mode(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED inline RealType mode(const binomial_distribution<RealType, Policy>& dist)
       {
          BOOST_MATH_STD_USING // ADL of std functions.
          RealType p = dist.success_fraction();
@@ -671,7 +673,7 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType median(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED inline RealType median(const binomial_distribution<RealType, Policy>& dist)
       { // Bounds for the median of the negative binomial distribution
         // VAN DE VEN R. ; WEBER N. C. ;
         // Univ. Sydney, school mathematics statistics, Sydney N.S.W. 2006, AUSTRALIE
@@ -689,7 +691,7 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType skewness(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED inline RealType skewness(const binomial_distribution<RealType, Policy>& dist)
       {
          BOOST_MATH_STD_USING // ADL of std functions.
          RealType p = dist.success_fraction();
@@ -698,7 +700,7 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType kurtosis(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED inline RealType kurtosis(const binomial_distribution<RealType, Policy>& dist)
       {
          RealType p = dist.success_fraction();
          RealType n = dist.trials();
@@ -706,7 +708,7 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType kurtosis_excess(const binomial_distribution<RealType, Policy>& dist)
+      BOOST_MATH_CUDA_ENABLED inline RealType kurtosis_excess(const binomial_distribution<RealType, Policy>& dist)
       {
          RealType p = dist.success_fraction();
          RealType q = 1 - p;
diff --git a/include/boost/math/distributions/cauchy.hpp b/include/boost/math/distributions/cauchy.hpp
index d914cca77e..3a5af69e43 100644
--- a/include/boost/math/distributions/cauchy.hpp
+++ b/include/boost/math/distributions/cauchy.hpp
@@ -1,5 +1,6 @@
 // Copyright John Maddock 2006, 2007.
 // Copyright Paul A. Bristow 2007.
+// Copyright Matt Borland 2024.
 
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
@@ -13,12 +14,21 @@
 #pragma warning(disable : 4127) // conditional expression is constant
 #endif
 
-#include <boost/math/distributions/fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/precision.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
 #include <utility>
 #include <cmath>
+#endif
 
 namespace boost{ namespace math
 {
@@ -30,7 +40,7 @@ namespace detail
 {
 
 template <class RealType, class Policy>
-RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealType& x, bool complement)
+BOOST_MATH_GPU_ENABLED RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealType& x, bool complement)
 {
    //
    // This calculates the cdf of the Cauchy distribution and/or its complement.
@@ -47,14 +57,14 @@ RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealTy
    //
    // Substituting into the above we get:
    //
-   // CDF = -atan(1/x)  ; x < 0
+   // CDF = -atan(1/x)/pi  ; x < 0
    //
    // So the procedure is to calculate the cdf for -fabs(x)
    // using the above formula, and then subtract from 1 when required
    // to get the result.
    //
    BOOST_MATH_STD_USING // for ADL of std functions
-   static const char* function = "boost::math::cdf(cauchy<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(cauchy<%1%>&, %1%)";
    RealType result = 0;
    RealType location = dist.location();
    RealType scale = dist.scale();
@@ -66,14 +76,25 @@ RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealTy
    {
       return result;
    }
-   if(std::numeric_limits<RealType>::has_infinity && x == std::numeric_limits<RealType>::infinity())
+   #ifdef BOOST_MATH_HAS_GPU_SUPPORT
+   if(x > tools::max_value<RealType>())
+   {
+      return static_cast<RealType>((complement) ? 0 : 1);
+   }
+   if(x < -tools::max_value<RealType>())
+   {
+      return static_cast<RealType>((complement) ? 1 : 0);
+   }
+   #else
+   if(boost::math::numeric_limits<RealType>::has_infinity && x == boost::math::numeric_limits<RealType>::infinity())
    { // cdf +infinity is unity.
      return static_cast<RealType>((complement) ? 0 : 1);
    }
-   if(std::numeric_limits<RealType>::has_infinity && x == -std::numeric_limits<RealType>::infinity())
+   if(boost::math::numeric_limits<RealType>::has_infinity && x == -boost::math::numeric_limits<RealType>::infinity())
    { // cdf -infinity is zero.
      return static_cast<RealType>((complement) ? 1 : 0);
    }
+   #endif
    if(false == detail::check_x(function, x, &result, Policy()))
    { // Catches x == NaN
       return result;
@@ -88,20 +109,19 @@ RealType cdf_imp(const cauchy_distribution<RealType, Policy>& dist, const RealTy
 } // cdf
 
 template <class RealType, class Policy>
-RealType quantile_imp(
+BOOST_MATH_GPU_ENABLED RealType quantile_imp(
       const cauchy_distribution<RealType, Policy>& dist,
-      const RealType& p,
+      RealType p,
       bool complement)
 {
    // This routine implements the quantile for the Cauchy distribution,
    // the value p may be the probability, or its complement if complement=true.
    //
-   // The procedure first performs argument reduction on p to avoid error
-   // when calculating the tangent, then calculates the distance from the
+   // The procedure calculates the distance from the
    // mid-point of the distribution.  This is either added or subtracted
    // from the location parameter depending on whether `complement` is true.
    //
-   static const char* function = "boost::math::quantile(cauchy<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(cauchy<%1%>&, %1%)";
    BOOST_MATH_STD_USING // for ADL of std functions
 
    RealType result = 0;
@@ -129,16 +149,15 @@ RealType quantile_imp(
       return (complement ? 1 : -1) * policies::raise_overflow_error<RealType>(function, 0, Policy());
    }
 
-   RealType P = p - floor(p);   // argument reduction of p:
-   if(P > 0.5)
+   if(p > 0.5)
    {
-      P = P - 1;
+      p = p - 1;
    }
-   if(P == 0.5)   // special case:
+   if(p == 0.5)   // special case:
    {
       return location;
    }
-   result = -scale / tan(constants::pi<RealType>() * P);
+   result = -scale / tan(constants::pi<RealType>() * p);
    return complement ? RealType(location - result) : RealType(location + result);
 } // quantile
 
@@ -151,20 +170,20 @@ class cauchy_distribution
    typedef RealType value_type;
    typedef Policy policy_type;
 
-   cauchy_distribution(RealType l_location = 0, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED cauchy_distribution(RealType l_location = 0, RealType l_scale = 1)
       : m_a(l_location), m_hg(l_scale)
    {
-    static const char* function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution";
+    constexpr auto function = "boost::math::cauchy_distribution<%1%>::cauchy_distribution";
      RealType result;
      detail::check_location(function, l_location, &result, Policy());
      detail::check_scale(function, l_scale, &result, Policy());
    } // cauchy_distribution
 
-   RealType location()const
+   BOOST_MATH_GPU_ENABLED RealType location()const
    {
       return m_a;
    }
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    {
       return m_hg;
    }
@@ -184,48 +203,48 @@ cauchy_distribution(RealType,RealType)->cauchy_distribution<typename boost::math
 #endif
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> range(const cauchy_distribution<RealType, Policy>&)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const cauchy_distribution<RealType, Policy>&)
 { // Range of permissible values for random variable x.
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   { 
-     return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
+     return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
   { // Can only use max_value.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max.
+   return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max.
   }
 }
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> support(const cauchy_distribution<RealType, Policy>& )
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const cauchy_distribution<RealType, Policy>& )
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   { 
-     return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
+     return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
   { // Can only use max_value.
      using boost::math::tools::max_value;
-     return std::pair<RealType, RealType>(-tools::max_value<RealType>(), max_value<RealType>()); // - to + max.
+     return boost::math::pair<RealType, RealType>(-tools::max_value<RealType>(), max_value<RealType>()); // - to + max.
   }
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
 {  
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::pdf(cauchy<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(cauchy<%1%>&, %1%)";
    RealType result = 0;
    RealType location = dist.location();
    RealType scale = dist.scale();
-   if(false == detail::check_scale("boost::math::pdf(cauchy<%1%>&, %1%)", scale, &result, Policy()))
+   if(false == detail::check_scale(function, scale, &result, Policy()))
    {
       return result;
    }
-   if(false == detail::check_location("boost::math::pdf(cauchy<%1%>&, %1%)", location, &result, Policy()))
+   if(false == detail::check_location(function, location, &result, Policy()))
    {
       return result;
    }
@@ -234,7 +253,7 @@ inline RealType pdf(const cauchy_distribution<RealType, Policy>& dist, const Rea
      return 0; // pdf + and - infinity is zero.
    }
    // These produce MSVC 4127 warnings, so the above used instead.
-   //if(std::numeric_limits<RealType>::has_infinity && abs(x) == std::numeric_limits<RealType>::infinity())
+   //if(boost::math::numeric_limits<RealType>::has_infinity && abs(x) == boost::math::numeric_limits<RealType>::infinity())
    //{ // pdf + and - infinity is zero.
    //  return 0;
    //}
@@ -250,111 +269,112 @@ inline RealType pdf(const cauchy_distribution<RealType, Policy>& dist, const Rea
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const cauchy_distribution<RealType, Policy>& dist, const RealType& x)
 {
    return detail::cdf_imp(dist, x, false);
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const cauchy_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const cauchy_distribution<RealType, Policy>& dist, const RealType& p)
 {
    return detail::quantile_imp(dist, p, false);
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
 {
    return detail::cdf_imp(c.dist, c.param, true);
 } //  cdf complement
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<cauchy_distribution<RealType, Policy>, RealType>& c)
 {
    return detail::quantile_imp(c.dist, c.param, true);
 } // quantile complement
 
 template <class RealType, class Policy>
-inline RealType mean(const cauchy_distribution<RealType, Policy>&)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const cauchy_distribution<RealType, Policy>&)
 {  // There is no mean:
    typedef typename Policy::assert_undefined_type assert_type;
-   static_assert(assert_type::value == 0, "assert type is undefined");
+   static_assert(assert_type::value == 0, "The Cauchy Distribution has no mean");
 
    return policies::raise_domain_error<RealType>(
       "boost::math::mean(cauchy<%1%>&)",
       "The Cauchy distribution does not have a mean: "
       "the only possible return value is %1%.",
-      std::numeric_limits<RealType>::quiet_NaN(), Policy());
+      boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
 }
 
 template <class RealType, class Policy>
-inline RealType variance(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no variance:
    typedef typename Policy::assert_undefined_type assert_type;
-   static_assert(assert_type::value == 0, "assert type is undefined");
+   static_assert(assert_type::value == 0, "The Cauchy Distribution has no variance");
 
    return policies::raise_domain_error<RealType>(
       "boost::math::variance(cauchy<%1%>&)",
       "The Cauchy distribution does not have a variance: "
       "the only possible return value is %1%.",
-      std::numeric_limits<RealType>::quiet_NaN(), Policy());
+      boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const cauchy_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const cauchy_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
 
 template <class RealType, class Policy>
-inline RealType median(const cauchy_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const cauchy_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
+
 template <class RealType, class Policy>
-inline RealType skewness(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no skewness:
    typedef typename Policy::assert_undefined_type assert_type;
-   static_assert(assert_type::value == 0, "assert type is undefined");
+   static_assert(assert_type::value == 0, "The Cauchy Distribution has no skewness");
 
    return policies::raise_domain_error<RealType>(
       "boost::math::skewness(cauchy<%1%>&)",
       "The Cauchy distribution does not have a skewness: "
       "the only possible return value is %1%.",
-      std::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?
+      boost::math::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no kurtosis:
    typedef typename Policy::assert_undefined_type assert_type;
-   static_assert(assert_type::value == 0, "assert type is undefined");
+   static_assert(assert_type::value == 0, "The Cauchy Distribution has no kurtosis");
 
    return policies::raise_domain_error<RealType>(
       "boost::math::kurtosis(cauchy<%1%>&)",
       "The Cauchy distribution does not have a kurtosis: "
       "the only possible return value is %1%.",
-      std::numeric_limits<RealType>::quiet_NaN(), Policy());
+      boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const cauchy_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const cauchy_distribution<RealType, Policy>& /*dist*/)
 {
    // There is no kurtosis excess:
    typedef typename Policy::assert_undefined_type assert_type;
-   static_assert(assert_type::value == 0, "assert type is undefined");
+   static_assert(assert_type::value == 0, "The Cauchy Distribution has no kurtosis excess");
 
    return policies::raise_domain_error<RealType>(
       "boost::math::kurtosis_excess(cauchy<%1%>&)",
       "The Cauchy distribution does not have a kurtosis: "
       "the only possible return value is %1%.",
-      std::numeric_limits<RealType>::quiet_NaN(), Policy());
+      boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const cauchy_distribution<RealType, Policy> & dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const cauchy_distribution<RealType, Policy> & dist)
 {
    using std::log;
    return log(2*constants::two_pi<RealType>()*dist.scale());
diff --git a/include/boost/math/distributions/chi_squared.hpp b/include/boost/math/distributions/chi_squared.hpp
index f5daddc0ad..3944569e89 100644
--- a/include/boost/math/distributions/chi_squared.hpp
+++ b/include/boost/math/distributions/chi_squared.hpp
@@ -9,14 +9,17 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_CHI_SQUARED_HPP
 #define BOOST_MATH_DISTRIBUTIONS_CHI_SQUARED_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/toms748_solve.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp> // for incomplete beta.
 #include <boost/math/distributions/complement.hpp> // complements
 #include <boost/math/distributions/detail/common_error_handling.hpp> // error checks
 #include <boost/math/special_functions/fpclassify.hpp>
 
-#include <utility>
-
 namespace boost{ namespace math{
 
 template <class RealType = double, class Policy = policies::policy<> >
@@ -26,20 +29,20 @@ class chi_squared_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit chi_squared_distribution(RealType i) : m_df(i)
+   BOOST_MATH_GPU_ENABLED explicit chi_squared_distribution(RealType i) : m_df(i)
    {
       RealType result;
       detail::check_df(
          "boost::math::chi_squared_distribution<%1%>::chi_squared_distribution", m_df, &result, Policy());
    } // chi_squared_distribution
 
-   RealType degrees_of_freedom()const
+   BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const
    {
       return m_df;
    }
 
    // Parameter estimation:
-   static RealType find_degrees_of_freedom(
+   BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
       RealType difference_from_variance,
       RealType alpha,
       RealType beta,
@@ -66,16 +69,16 @@ chi_squared_distribution(RealType)->chi_squared_distribution<typename boost::mat
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const chi_squared_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const chi_squared_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   { 
-    return std::pair<RealType, RealType>(static_cast<RealType>(0), std::numeric_limits<RealType>::infinity()); // 0 to + infinity.
+    return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), boost::math::numeric_limits<RealType>::infinity()); // 0 to + infinity.
   }
   else
   {
     using boost::math::tools::max_value;
-    return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + max.
+    return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + max.
   }
 }
 
@@ -84,21 +87,21 @@ inline std::pair<RealType, RealType> range(const chi_squared_distribution<RealTy
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const chi_squared_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const chi_squared_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), tools::max_value<RealType>()); // 0 to + infinity.
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), tools::max_value<RealType>()); // 0 to + infinity.
 }
 
 template <class RealType, class Policy>
-RealType pdf(const chi_squared_distribution<RealType, Policy>& dist, const RealType& chi_square)
+BOOST_MATH_GPU_ENABLED RealType pdf(const chi_squared_distribution<RealType, Policy>& dist, const RealType& chi_square)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
    RealType degrees_of_freedom = dist.degrees_of_freedom();
    // Error check:
    RealType error_result;
 
-   static const char* function = "boost::math::pdf(const chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const chi_squared_distribution<%1%>&, %1%)";
 
    if(false == detail::check_df(
          function, degrees_of_freedom, &error_result, Policy()))
@@ -132,12 +135,12 @@ RealType pdf(const chi_squared_distribution<RealType, Policy>& dist, const RealT
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const chi_squared_distribution<RealType, Policy>& dist, const RealType& chi_square)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const chi_squared_distribution<RealType, Policy>& dist, const RealType& chi_square)
 {
    RealType degrees_of_freedom = dist.degrees_of_freedom();
    // Error check:
    RealType error_result;
-   static const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)";
 
    if(false == detail::check_df(
          function, degrees_of_freedom, &error_result, Policy()))
@@ -153,10 +156,10 @@ inline RealType cdf(const chi_squared_distribution<RealType, Policy>& dist, cons
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
 {
    RealType degrees_of_freedom = dist.degrees_of_freedom();
-   static const char* function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)";
    // Error check:
    RealType error_result;
    if(false ==
@@ -170,11 +173,11 @@ inline RealType quantile(const chi_squared_distribution<RealType, Policy>& dist,
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<chi_squared_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<chi_squared_distribution<RealType, Policy>, RealType>& c)
 {
    RealType const& degrees_of_freedom = c.dist.degrees_of_freedom();
    RealType const& chi_square = c.param;
-   static const char* function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const chi_squared_distribution<%1%>&, %1%)";
    // Error check:
    RealType error_result;
    if(false == detail::check_df(
@@ -191,11 +194,11 @@ inline RealType cdf(const complemented2_type<chi_squared_distribution<RealType,
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<chi_squared_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<chi_squared_distribution<RealType, Policy>, RealType>& c)
 {
    RealType const& degrees_of_freedom = c.dist.degrees_of_freedom();
    RealType const& q = c.param;
-   static const char* function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const chi_squared_distribution<%1%>&, %1%)";
    // Error check:
    RealType error_result;
    if(false == (
@@ -208,22 +211,22 @@ inline RealType quantile(const complemented2_type<chi_squared_distribution<RealT
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const chi_squared_distribution<RealType, Policy>& dist)
 { // Mean of Chi-Squared distribution = v.
   return dist.degrees_of_freedom();
 } // mean
 
 template <class RealType, class Policy>
-inline RealType variance(const chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const chi_squared_distribution<RealType, Policy>& dist)
 { // Variance of Chi-Squared distribution = 2v.
   return 2 * dist.degrees_of_freedom();
 } // variance
 
 template <class RealType, class Policy>
-inline RealType mode(const chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const chi_squared_distribution<RealType, Policy>& dist)
 {
    RealType df = dist.degrees_of_freedom();
-   static const char* function = "boost::math::mode(const chi_squared_distribution<%1%>&)";
+   constexpr auto function = "boost::math::mode(const chi_squared_distribution<%1%>&)";
 
    if(df < 2)
       return policies::raise_domain_error<RealType>(
@@ -234,7 +237,7 @@ inline RealType mode(const chi_squared_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const chi_squared_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING // For ADL
    RealType df = dist.degrees_of_freedom();
@@ -242,14 +245,14 @@ inline RealType skewness(const chi_squared_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const chi_squared_distribution<RealType, Policy>& dist)
 {
    RealType df = dist.degrees_of_freedom();
    return 3 + 12 / df;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const chi_squared_distribution<RealType, Policy>& dist)
 {
    RealType df = dist.degrees_of_freedom();
    return 12 / df;
@@ -264,12 +267,12 @@ namespace detail
 template <class RealType, class Policy>
 struct df_estimator
 {
-   df_estimator(RealType a, RealType b, RealType variance, RealType delta)
+   BOOST_MATH_GPU_ENABLED df_estimator(RealType a, RealType b, RealType variance, RealType delta)
       : alpha(a), beta(b), ratio(delta/variance)
    { // Constructor
    }
 
-   RealType operator()(const RealType& df)
+   BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df)
    {
       if(df <= tools::min_value<RealType>())
          return 1;
@@ -297,14 +300,14 @@ struct df_estimator
 } // namespace detail
 
 template <class RealType, class Policy>
-RealType chi_squared_distribution<RealType, Policy>::find_degrees_of_freedom(
+BOOST_MATH_GPU_ENABLED RealType chi_squared_distribution<RealType, Policy>::find_degrees_of_freedom(
    RealType difference_from_variance,
    RealType alpha,
    RealType beta,
    RealType variance,
    RealType hint)
 {
-   static const char* function = "boost::math::chi_squared_distribution<%1%>::find_degrees_of_freedom(%1%,%1%,%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::chi_squared_distribution<%1%>::find_degrees_of_freedom(%1%,%1%,%1%,%1%,%1%)";
    // Check for domain errors:
    RealType error_result;
    if(false ==
@@ -321,8 +324,8 @@ RealType chi_squared_distribution<RealType, Policy>::find_degrees_of_freedom(
 
    detail::df_estimator<RealType, Policy> f(alpha, beta, variance, difference_from_variance);
    tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
-   std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
-   std::pair<RealType, RealType> r =
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+   boost::math::pair<RealType, RealType> r =
      tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy());
    RealType result = r.first + (r.second - r.first) / 2;
    if(max_iter >= policies::get_max_root_iterations<Policy>())
diff --git a/include/boost/math/distributions/complement.hpp b/include/boost/math/distributions/complement.hpp
index 5c062a7cdf..c63b8a5041 100644
--- a/include/boost/math/distributions/complement.hpp
+++ b/include/boost/math/distributions/complement.hpp
@@ -1,5 +1,6 @@
 //  (C) Copyright John Maddock 2006.
 //  (C) Copyright Paul A. Bristow 2006.
+//  (C) Copyright Matt Borland 2024
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -7,6 +8,8 @@
 #ifndef BOOST_STATS_COMPLEMENT_HPP
 #define BOOST_STATS_COMPLEMENT_HPP
 
+#include <boost/math/tools/config.hpp>
+
 //
 // This code really defines our own tuple type.
 // It would be nice to reuse boost::math::tuple
@@ -19,7 +22,7 @@ namespace boost{ namespace math{
 template <class Dist, class RealType>
 struct complemented2_type
 {
-   complemented2_type(
+   BOOST_MATH_GPU_ENABLED complemented2_type(
       const Dist& d, 
       const RealType& p1)
       : dist(d), 
@@ -35,7 +38,7 @@ struct complemented2_type
 template <class Dist, class RealType1, class RealType2>
 struct complemented3_type
 {
-   complemented3_type(
+   BOOST_MATH_GPU_ENABLED complemented3_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2)
@@ -53,7 +56,7 @@ struct complemented3_type
 template <class Dist, class RealType1, class RealType2, class RealType3>
 struct complemented4_type
 {
-   complemented4_type(
+   BOOST_MATH_GPU_ENABLED complemented4_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -74,7 +77,7 @@ struct complemented4_type
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4>
 struct complemented5_type
 {
-   complemented5_type(
+   BOOST_MATH_GPU_ENABLED complemented5_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -98,7 +101,7 @@ struct complemented5_type
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5>
 struct complemented6_type
 {
-   complemented6_type(
+   BOOST_MATH_GPU_ENABLED complemented6_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -125,7 +128,7 @@ struct complemented6_type
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5, class RealType6>
 struct complemented7_type
 {
-   complemented7_type(
+   BOOST_MATH_GPU_ENABLED complemented7_type(
       const Dist& d, 
       const RealType1& p1,
       const RealType2& p2,
@@ -153,37 +156,37 @@ struct complemented7_type
 };
 
 template <class Dist, class RealType>
-inline complemented2_type<Dist, RealType> complement(const Dist& d, const RealType& r)
+BOOST_MATH_GPU_ENABLED inline complemented2_type<Dist, RealType> complement(const Dist& d, const RealType& r)
 {
    return complemented2_type<Dist, RealType>(d, r);
 }
 
 template <class Dist, class RealType1, class RealType2>
-inline complemented3_type<Dist, RealType1, RealType2> complement(const Dist& d, const RealType1& r1, const RealType2& r2)
+BOOST_MATH_GPU_ENABLED inline complemented3_type<Dist, RealType1, RealType2> complement(const Dist& d, const RealType1& r1, const RealType2& r2)
 {
    return complemented3_type<Dist, RealType1, RealType2>(d, r1, r2);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3>
-inline complemented4_type<Dist, RealType1, RealType2, RealType3> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3)
+BOOST_MATH_GPU_ENABLED inline complemented4_type<Dist, RealType1, RealType2, RealType3> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3)
 {
    return complemented4_type<Dist, RealType1, RealType2, RealType3>(d, r1, r2, r3);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4>
-inline complemented5_type<Dist, RealType1, RealType2, RealType3, RealType4> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4)
+BOOST_MATH_GPU_ENABLED inline complemented5_type<Dist, RealType1, RealType2, RealType3, RealType4> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4)
 {
    return complemented5_type<Dist, RealType1, RealType2, RealType3, RealType4>(d, r1, r2, r3, r4);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5>
-inline complemented6_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5)
+BOOST_MATH_GPU_ENABLED inline complemented6_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5)
 {
    return complemented6_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5>(d, r1, r2, r3, r4, r5);
 }
 
 template <class Dist, class RealType1, class RealType2, class RealType3, class RealType4, class RealType5, class RealType6>
-inline complemented7_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5, RealType6> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6)
+BOOST_MATH_GPU_ENABLED inline complemented7_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5, RealType6> complement(const Dist& d, const RealType1& r1, const RealType2& r2, const RealType3& r3, const RealType4& r4, const RealType5& r5, const RealType6& r6)
 {
    return complemented7_type<Dist, RealType1, RealType2, RealType3, RealType4, RealType5, RealType6>(d, r1, r2, r3, r4, r5, r6);
 }
diff --git a/include/boost/math/distributions/detail/common_error_handling.hpp b/include/boost/math/distributions/detail/common_error_handling.hpp
index f03f2c49b8..06e3c105bd 100644
--- a/include/boost/math/distributions/detail/common_error_handling.hpp
+++ b/include/boost/math/distributions/detail/common_error_handling.hpp
@@ -1,5 +1,6 @@
 // Copyright John Maddock 2006, 2007.
 // Copyright Paul A. Bristow 2006, 2007, 2012.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -9,6 +10,8 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP
 #define BOOST_MATH_DISTRIBUTIONS_COMMON_ERROR_HANDLING_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
 // using boost::math::isfinite;
@@ -23,7 +26,7 @@ namespace boost{ namespace math{ namespace detail
 {
 
 template <class RealType, class Policy>
-inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool check_probability(const char* function, RealType const& prob, RealType* result, const Policy& pol)
 {
    if((prob < 0) || (prob > 1) || !(boost::math::isfinite)(prob))
    {
@@ -36,7 +39,7 @@ inline bool check_probability(const char* function, RealType const& prob, RealTy
 }
 
 template <class RealType, class Policy>
-inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool check_df(const char* function, RealType const& df, RealType* result, const Policy& pol)
 { //  df > 0 but NOT +infinity allowed.
    if((df <= 0) || !(boost::math::isfinite)(df))
    {
@@ -49,7 +52,7 @@ inline bool check_df(const char* function, RealType const& df, RealType* result,
 }
 
 template <class RealType, class Policy>
-inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealType* result, const Policy& pol)
 {  // df > 0 or +infinity are allowed.
    if( (df <= 0) || (boost::math::isnan)(df) )
    { // is bad df <= 0 or NaN or -infinity.
@@ -63,7 +66,7 @@ inline bool check_df_gt0_to_inf(const char* function, RealType const& df, RealTy
 
 
 template <class RealType, class Policy>
-inline bool check_scale(
+BOOST_MATH_GPU_ENABLED inline bool check_scale(
       const char* function,
       RealType scale,
       RealType* result,
@@ -80,7 +83,7 @@ inline bool check_scale(
 }
 
 template <class RealType, class Policy>
-inline bool check_location(
+BOOST_MATH_GPU_ENABLED inline bool check_location(
       const char* function,
       RealType location,
       RealType* result,
@@ -97,7 +100,7 @@ inline bool check_location(
 }
 
 template <class RealType, class Policy>
-inline bool check_x(
+BOOST_MATH_GPU_ENABLED inline bool check_x(
       const char* function,
       RealType x,
       RealType* result,
@@ -118,7 +121,7 @@ inline bool check_x(
 } // bool check_x
 
 template <class RealType, class Policy>
-inline bool check_x_not_NaN(
+BOOST_MATH_GPU_ENABLED inline bool check_x_not_NaN(
   const char* function,
   RealType x,
   RealType* result,
@@ -138,7 +141,7 @@ inline bool check_x_not_NaN(
 } // bool check_x_not_NaN
 
 template <class RealType, class Policy>
-inline bool check_x_gt0(
+BOOST_MATH_GPU_ENABLED inline bool check_x_gt0(
       const char* function,
       RealType x,
       RealType* result,
@@ -159,7 +162,7 @@ inline bool check_x_gt0(
 } // bool check_x_gt0
 
 template <class RealType, class Policy>
-inline bool check_positive_x(
+BOOST_MATH_GPU_ENABLED inline bool check_positive_x(
       const char* function,
       RealType x,
       RealType* result,
@@ -179,13 +182,14 @@ inline bool check_positive_x(
 }
 
 template <class RealType, class Policy>
-inline bool check_non_centrality(
+BOOST_MATH_GPU_ENABLED inline bool check_non_centrality(
       const char* function,
       RealType ncp,
       RealType* result,
       const Policy& pol)
 {
-   static const RealType upper_limit = static_cast<RealType>((std::numeric_limits<long long>::max)()) - boost::math::policies::get_max_root_iterations<Policy>();
+   BOOST_MATH_STATIC const RealType upper_limit = static_cast<RealType>((boost::math::numeric_limits<long long>::max)()) - boost::math::policies::get_max_root_iterations<Policy>();
+
    if((ncp < 0) || !(boost::math::isfinite)(ncp) || ncp > upper_limit)
    {
       *result = policies::raise_domain_error<RealType>(
@@ -197,7 +201,7 @@ inline bool check_non_centrality(
 }
 
 template <class RealType, class Policy>
-inline bool check_finite(
+BOOST_MATH_GPU_ENABLED inline bool check_finite(
       const char* function,
       RealType x,
       RealType* result,
diff --git a/include/boost/math/distributions/detail/derived_accessors.hpp b/include/boost/math/distributions/detail/derived_accessors.hpp
index eb76409a1c..90679ef21f 100644
--- a/include/boost/math/distributions/detail/derived_accessors.hpp
+++ b/include/boost/math/distributions/detail/derived_accessors.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -27,9 +28,13 @@
 // can find the definitions referred to herein.
 //
 
-#include <cmath>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/assert.hpp>
 
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <cmath>
+#endif
+
 #ifdef _MSC_VER
 # pragma warning(push)
 # pragma warning(disable: 4723) // potential divide by 0
@@ -39,24 +44,24 @@
 namespace boost{ namespace math{
 
 template <class Distribution>
-typename Distribution::value_type variance(const Distribution& dist);
+BOOST_MATH_GPU_ENABLED typename Distribution::value_type variance(const Distribution& dist);
 
 template <class Distribution>
-inline typename Distribution::value_type standard_deviation(const Distribution& dist)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type standard_deviation(const Distribution& dist)
 {
    BOOST_MATH_STD_USING  // ADL of sqrt.
    return sqrt(variance(dist));
 }
 
 template <class Distribution>
-inline typename Distribution::value_type variance(const Distribution& dist)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type variance(const Distribution& dist)
 {
    typename Distribution::value_type result = standard_deviation(dist);
    return result * result;
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type hazard(const Distribution& dist, const RealType& x)
 { // hazard function
   // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ
    typedef typename Distribution::value_type value_type;
@@ -75,7 +80,7 @@ inline typename Distribution::value_type hazard(const Distribution& dist, const
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type chf(const Distribution& dist, const RealType& x)
 { // cumulative hazard function.
   // http://www.itl.nist.gov/div898/handbook/eda/section3/eda362.htm#HAZ
    BOOST_MATH_STD_USING
@@ -83,7 +88,7 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea
 }
 
 template <class Distribution>
-inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type coefficient_of_variation(const Distribution& dist)
 {
    typedef typename Distribution::value_type value_type;
    typedef typename Distribution::policy_type policy_type;
@@ -104,33 +109,33 @@ inline typename Distribution::value_type coefficient_of_variation(const Distribu
 // implementation with all arguments of the same type:
 //
 template <class Distribution, class RealType>
-inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type pdf(const Distribution& dist, const RealType& x)
 {
    typedef typename Distribution::value_type value_type;
    return pdf(dist, static_cast<value_type>(x));
 }
 template <class Distribution, class RealType>
-inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logpdf(const Distribution& dist, const RealType& x)
 {
    using std::log;
    typedef typename Distribution::value_type value_type;
    return log(pdf(dist, static_cast<value_type>(x)));
 }
 template <class Distribution, class RealType>
-inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const Distribution& dist, const RealType& x)
 {
    typedef typename Distribution::value_type value_type;
    return cdf(dist, static_cast<value_type>(x));
 }
 template <class Distribution, class Realtype>
-inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const Distribution& dist, const Realtype& x)
 {
    using std::log;
    using value_type = typename Distribution::value_type;
    return log(cdf(dist, static_cast<value_type>(x)));
 }
 template <class Distribution, class RealType>
-inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const Distribution& dist, const RealType& x)
 {
    typedef typename Distribution::value_type value_type;
    return quantile(dist, static_cast<value_type>(x));
@@ -144,14 +149,14 @@ inline typename Distribution::value_type chf(const Distribution& dist, const Rea
 }
 */
 template <class Distribution, class RealType>
-inline typename Distribution::value_type cdf(const complemented2_type<Distribution, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type cdf(const complemented2_type<Distribution, RealType>& c)
 {
    typedef typename Distribution::value_type value_type;
    return cdf(complement(c.dist, static_cast<value_type>(c.param)));
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type logcdf(const complemented2_type<Distribution, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type logcdf(const complemented2_type<Distribution, RealType>& c)
 {
    using std::log;
    typedef typename Distribution::value_type value_type;
@@ -159,14 +164,14 @@ inline typename Distribution::value_type logcdf(const complemented2_type<Distrib
 }
 
 template <class Distribution, class RealType>
-inline typename Distribution::value_type quantile(const complemented2_type<Distribution, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline typename Distribution::value_type quantile(const complemented2_type<Distribution, RealType>& c)
 {
    typedef typename Distribution::value_type value_type;
    return quantile(complement(c.dist, static_cast<value_type>(c.param)));
 }
 
 template <class Dist>
-inline typename Dist::value_type median(const Dist& d)
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type median(const Dist& d)
 { // median - default definition for those distributions for which a
   // simple closed form is not known,
   // and for which a domain_error and/or NaN generating function is NOT defined.
diff --git a/include/boost/math/distributions/detail/generic_mode.hpp b/include/boost/math/distributions/detail/generic_mode.hpp
index 19c8b2af01..9306c815da 100644
--- a/include/boost/math/distributions/detail/generic_mode.hpp
+++ b/include/boost/math/distributions/detail/generic_mode.hpp
@@ -8,19 +8,22 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_DETAIL_MODE_HPP
 #define BOOST_MATH_DISTRIBUTIONS_DETAIL_MODE_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/tools/minima.hpp> // function minimization for mode
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/distributions/fwd.hpp>
+#include <boost/math/policies/policy.hpp>
 
 namespace boost{ namespace math{ namespace detail{
 
 template <class Dist>
 struct pdf_minimizer
 {
-   pdf_minimizer(const Dist& d)
+   BOOST_MATH_GPU_ENABLED pdf_minimizer(const Dist& d)
       : dist(d) {}
 
-   typename Dist::value_type operator()(const typename Dist::value_type& x)
+   BOOST_MATH_GPU_ENABLED typename Dist::value_type operator()(const typename Dist::value_type& x)
    {
       return -pdf(dist, x);
    }
@@ -29,7 +32,7 @@ struct pdf_minimizer
 };
 
 template <class Dist>
-typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::value_type guess, const char* function, typename Dist::value_type step = 0)
+BOOST_MATH_GPU_ENABLED typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::value_type guess, const char* function, typename Dist::value_type step = 0)
 {
    BOOST_MATH_STD_USING
    typedef typename Dist::value_type value_type;
@@ -70,7 +73,7 @@ typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::val
       v = pdf(dist, lower_bound);
    }while(maxval < v);
 
-   std::uintmax_t max_iter = policies::get_max_root_iterations<policy_type>();
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<policy_type>();
 
    value_type result = tools::brent_find_minima(
       pdf_minimizer<Dist>(dist), 
@@ -90,7 +93,7 @@ typename Dist::value_type generic_find_mode(const Dist& dist, typename Dist::val
 // As above,but confined to the interval [0,1]:
 //
 template <class Dist>
-typename Dist::value_type generic_find_mode_01(const Dist& dist, typename Dist::value_type guess, const char* function)
+BOOST_MATH_GPU_ENABLED typename Dist::value_type generic_find_mode_01(const Dist& dist, typename Dist::value_type guess, const char* function)
 {
    BOOST_MATH_STD_USING
    typedef typename Dist::value_type value_type;
@@ -121,7 +124,7 @@ typename Dist::value_type generic_find_mode_01(const Dist& dist, typename Dist::
       v = pdf(dist, lower_bound);
    }while(maxval < v);
 
-   std::uintmax_t max_iter = policies::get_max_root_iterations<policy_type>();
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<policy_type>();
 
    value_type result = tools::brent_find_minima(
       pdf_minimizer<Dist>(dist), 
diff --git a/include/boost/math/distributions/detail/generic_quantile.hpp b/include/boost/math/distributions/detail/generic_quantile.hpp
index 438ac952f0..917532566f 100644
--- a/include/boost/math/distributions/detail/generic_quantile.hpp
+++ b/include/boost/math/distributions/detail/generic_quantile.hpp
@@ -6,6 +6,10 @@
 #ifndef BOOST_MATH_DISTIBUTIONS_DETAIL_GENERIC_QUANTILE_HPP
 #define BOOST_MATH_DISTIBUTIONS_DETAIL_GENERIC_QUANTILE_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/cstdint.hpp>
+
 namespace boost{ namespace math{ namespace detail{
 
 template <class Dist>
@@ -14,10 +18,10 @@ struct generic_quantile_finder
    using value_type = typename Dist::value_type;
    using policy_type = typename Dist::policy_type;
 
-   generic_quantile_finder(const Dist& d, value_type t, bool c)
+   BOOST_MATH_GPU_ENABLED generic_quantile_finder(const Dist& d, value_type t, bool c)
       : dist(d), target(t), comp(c) {}
 
-   value_type operator()(const value_type& x)
+   BOOST_MATH_GPU_ENABLED value_type operator()(const value_type& x)
    {
       return comp ?
          value_type(target - cdf(complement(dist, x)))
@@ -31,7 +35,7 @@ struct generic_quantile_finder
 };
 
 template <class T, class Policy>
-inline T check_range_result(const T& x, const Policy& pol, const char* function)
+BOOST_MATH_GPU_ENABLED inline T check_range_result(const T& x, const Policy& pol, const char* function)
 {
    if((x >= 0) && (x < tools::min_value<T>()))
    {
@@ -49,7 +53,7 @@ inline T check_range_result(const T& x, const Policy& pol, const char* function)
 }
 
 template <class Dist>
-typename Dist::value_type generic_quantile(const Dist& dist, const typename Dist::value_type& p, const typename Dist::value_type& guess, bool comp, const char* function)
+BOOST_MATH_GPU_ENABLED typename Dist::value_type generic_quantile(const Dist& dist, const typename Dist::value_type& p, const typename Dist::value_type& guess, bool comp, const char* function)
 {
    using value_type = typename Dist::value_type;
    using policy_type = typename Dist::policy_type;
@@ -78,8 +82,8 @@ typename Dist::value_type generic_quantile(const Dist& dist, const typename Dist
 
    generic_quantile_finder<Dist> f(dist, p, comp);
    tools::eps_tolerance<value_type> tol(policies::digits<value_type, forwarding_policy>() - 3);
-   std::uintmax_t max_iter = policies::get_max_root_iterations<forwarding_policy>();
-   std::pair<value_type, value_type> ir = tools::bracket_and_solve_root(
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<forwarding_policy>();
+   boost::math::pair<value_type, value_type> ir = tools::bracket_and_solve_root(
       f, guess, value_type(2), true, tol, max_iter, forwarding_policy());
    value_type result = ir.first + (ir.second - ir.first) / 2;
    if(max_iter >= policies::get_max_root_iterations<forwarding_policy>())
diff --git a/include/boost/math/distributions/detail/inv_discrete_quantile.hpp b/include/boost/math/distributions/detail/inv_discrete_quantile.hpp
index 739a866660..ac4a2b2318 100644
--- a/include/boost/math/distributions/detail/inv_discrete_quantile.hpp
+++ b/include/boost/math/distributions/detail/inv_discrete_quantile.hpp
@@ -6,7 +6,11 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_DETAIL_INV_DISCRETE_QUANTILE
 #define BOOST_MATH_DISTRIBUTIONS_DETAIL_INV_DISCRETE_QUANTILE
 
-#include <algorithm>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/toms748_solve.hpp>
+#include <boost/math/tools/tuple.hpp>
 
 namespace boost{ namespace math{ namespace detail{
 
@@ -19,10 +23,10 @@ struct distribution_quantile_finder
    typedef typename Dist::value_type value_type;
    typedef typename Dist::policy_type policy_type;
 
-   distribution_quantile_finder(const Dist d, value_type p, bool c)
+   BOOST_MATH_GPU_ENABLED distribution_quantile_finder(const Dist d, value_type p, bool c)
       : dist(d), target(p), comp(c) {}
 
-   value_type operator()(value_type const& x)
+   BOOST_MATH_GPU_ENABLED value_type operator()(value_type const& x)
    {
       return comp ? value_type(target - cdf(complement(dist, x))) : value_type(cdf(dist, x) - target);
    }
@@ -42,24 +46,24 @@ struct distribution_quantile_finder
 // in the root no longer being bracketed.
 //
 template <class Real, class Tol>
-void adjust_bounds(Real& /* a */, Real& /* b */, Tol const& /* tol */){}
+BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& /* a */, Real& /* b */, Tol const& /* tol */){}
 
 template <class Real>
-void adjust_bounds(Real& /* a */, Real& b, tools::equal_floor const& /* tol */)
+BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& /* a */, Real& b, tools::equal_floor const& /* tol */)
 {
    BOOST_MATH_STD_USING
    b -= tools::epsilon<Real>() * b;
 }
 
 template <class Real>
-void adjust_bounds(Real& a, Real& /* b */, tools::equal_ceil const& /* tol */)
+BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& a, Real& /* b */, tools::equal_ceil const& /* tol */)
 {
    BOOST_MATH_STD_USING
    a += tools::epsilon<Real>() * a;
 }
 
 template <class Real>
-void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol */)
+BOOST_MATH_GPU_ENABLED void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol */)
 {
    BOOST_MATH_STD_USING
    a += tools::epsilon<Real>() * a;
@@ -69,7 +73,7 @@ void adjust_bounds(Real& a, Real& b, tools::equal_nearest_integer const& /* tol
 // This is where all the work is done:
 //
 template <class Dist, class Tolerance>
-typename Dist::value_type 
+BOOST_MATH_GPU_ENABLED typename Dist::value_type 
    do_inverse_discrete_quantile(
       const Dist& dist,
       const typename Dist::value_type& p,
@@ -78,12 +82,12 @@ typename Dist::value_type
       const typename Dist::value_type& multiplier,
       typename Dist::value_type adder,
       const Tolerance& tol,
-      std::uintmax_t& max_iter)
+      boost::math::uintmax_t& max_iter)
 {
    typedef typename Dist::value_type value_type;
    typedef typename Dist::policy_type policy_type;
 
-   static const char* function = "boost::math::do_inverse_discrete_quantile<%1%>";
+   constexpr auto function = "boost::math::do_inverse_discrete_quantile<%1%>";
 
    BOOST_MATH_STD_USING
 
@@ -100,7 +104,7 @@ typename Dist::value_type
       guess = min_bound;
 
    value_type fa = f(guess);
-   std::uintmax_t count = max_iter - 1;
+   boost::math::uintmax_t count = max_iter - 1;
    value_type fb(fa), a(guess), b =0; // Compiler warning C4701: potentially uninitialized local variable 'b' used
 
    if(fa == 0)
@@ -130,7 +134,7 @@ typename Dist::value_type
          else
          {
             b = a;
-            a = (std::max)(value_type(b - 1), value_type(0));
+            a = BOOST_MATH_GPU_SAFE_MAX(value_type(b - 1), value_type(0));
             if(a < min_bound)
                a = min_bound;
             fa = f(a);
@@ -153,7 +157,7 @@ typename Dist::value_type
       // If we're looking for a large result, then bump "adder" up
       // by a bit to increase our chances of bracketing the root:
       //
-      //adder = (std::max)(adder, 0.001f * guess);
+      //adder = BOOST_MATH_GPU_SAFE_MAX(adder, 0.001f * guess);
       if(fa < 0)
       {
          b = a + adder;
@@ -162,7 +166,7 @@ typename Dist::value_type
       }
       else
       {
-         b = (std::max)(value_type(a - adder), value_type(0));
+         b = BOOST_MATH_GPU_SAFE_MAX(value_type(a - adder), value_type(0));
          if(b < min_bound)
             b = min_bound;
       }
@@ -186,7 +190,7 @@ typename Dist::value_type
          }
          else
          {
-            b = (std::max)(value_type(a - adder), value_type(0));
+            b = BOOST_MATH_GPU_SAFE_MAX(value_type(a - adder), value_type(0));
             if(b < min_bound)
                b = min_bound;
          }
@@ -195,9 +199,8 @@ typename Dist::value_type
       }
       if(a > b)
       {
-         using std::swap;
-         swap(a, b);
-         swap(fa, fb);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(fa, fb);
       }
    }
    //
@@ -274,7 +277,7 @@ typename Dist::value_type
    //
    // Go ahead and find the root:
    //
-   std::pair<value_type, value_type> r = toms748_solve(f, a, b, fa, fb, tol, count, policy_type());
+   boost::math::pair<value_type, value_type> r = toms748_solve(f, a, b, fa, fb, tol, count, policy_type());
    max_iter += count;
    if (max_iter >= policies::get_max_root_iterations<policy_type>())
    {
@@ -293,7 +296,7 @@ typename Dist::value_type
 // is very close 1.
 //
 template <class Dist>
-inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
 {
    BOOST_MATH_STD_USING
    typename Dist::value_type cc = ceil(result);
@@ -307,7 +310,11 @@ inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::va
    //
    while(result != 0)
    {
+      #ifdef BOOST_MATH_HAS_GPU_SUPPORT
+      cc = floor(::nextafter(result, -tools::max_value<typename Dist::value_type>()));
+      #else
       cc = floor(float_prior(result));
+      #endif
       if(cc < support(d).first)
          break;
       pp = c ? cdf(complement(d, cc)) : cdf(d, cc);
@@ -325,7 +332,7 @@ inline typename Dist::value_type round_to_floor(const Dist& d, typename Dist::va
 #endif
 
 template <class Dist>
-inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::value_type result, typename Dist::value_type p, bool c)
 {
    BOOST_MATH_STD_USING
    typename Dist::value_type cc = floor(result);
@@ -339,7 +346,11 @@ inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::val
    //
    while(true)
    {
+      #ifdef BOOST_MATH_HAS_GPU_SUPPORT
+      cc = ceil(::nextafter(result, tools::max_value<typename Dist::value_type>()));
+      #else
       cc = ceil(float_next(result));
+      #endif
       if(cc > support(d).second)
          break;
       pp = c ? cdf(complement(d, cc)) : cdf(d, cc);
@@ -362,7 +373,7 @@ inline typename Dist::value_type round_to_ceil(const Dist& d, typename Dist::val
 // to an int where required.
 //
 template <class Dist>
-inline typename Dist::value_type 
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type 
    inverse_discrete_quantile(
       const Dist& dist,
       typename Dist::value_type p,
@@ -371,7 +382,7 @@ inline typename Dist::value_type
       const typename Dist::value_type& multiplier,
       const typename Dist::value_type& adder,
       const policies::discrete_quantile<policies::real>&,
-      std::uintmax_t& max_iter)
+      boost::math::uintmax_t& max_iter)
 {
    if(p > 0.5)
    {
@@ -393,7 +404,7 @@ inline typename Dist::value_type
 }
 
 template <class Dist>
-inline typename Dist::value_type 
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type 
    inverse_discrete_quantile(
       const Dist& dist,
       const typename Dist::value_type& p,
@@ -402,7 +413,7 @@ inline typename Dist::value_type
       const typename Dist::value_type& multiplier,
       const typename Dist::value_type& adder,
       const policies::discrete_quantile<policies::integer_round_outwards>&,
-      std::uintmax_t& max_iter)
+      boost::math::uintmax_t& max_iter)
 {
    typedef typename Dist::value_type value_type;
    BOOST_MATH_STD_USING
@@ -436,7 +447,7 @@ inline typename Dist::value_type
 }
 
 template <class Dist>
-inline typename Dist::value_type 
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type 
    inverse_discrete_quantile(
       const Dist& dist,
       const typename Dist::value_type& p,
@@ -445,7 +456,7 @@ inline typename Dist::value_type
       const typename Dist::value_type& multiplier,
       const typename Dist::value_type& adder,
       const policies::discrete_quantile<policies::integer_round_inwards>&,
-      std::uintmax_t& max_iter)
+      boost::math::uintmax_t& max_iter)
 {
    typedef typename Dist::value_type value_type;
    BOOST_MATH_STD_USING
@@ -479,7 +490,7 @@ inline typename Dist::value_type
 }
 
 template <class Dist>
-inline typename Dist::value_type 
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type 
    inverse_discrete_quantile(
       const Dist& dist,
       const typename Dist::value_type& p,
@@ -488,7 +499,7 @@ inline typename Dist::value_type
       const typename Dist::value_type& multiplier,
       const typename Dist::value_type& adder,
       const policies::discrete_quantile<policies::integer_round_down>&,
-      std::uintmax_t& max_iter)
+      boost::math::uintmax_t& max_iter)
 {
    typedef typename Dist::value_type value_type;
    BOOST_MATH_STD_USING
@@ -507,7 +518,7 @@ inline typename Dist::value_type
 }
 
 template <class Dist>
-inline typename Dist::value_type 
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type 
    inverse_discrete_quantile(
       const Dist& dist,
       const typename Dist::value_type& p,
@@ -516,7 +527,7 @@ inline typename Dist::value_type
       const typename Dist::value_type& multiplier,
       const typename Dist::value_type& adder,
       const policies::discrete_quantile<policies::integer_round_up>&,
-      std::uintmax_t& max_iter)
+      boost::math::uintmax_t& max_iter)
 {
    BOOST_MATH_STD_USING
    typename Dist::value_type pp = c ? 1 - p : p;
@@ -534,7 +545,7 @@ inline typename Dist::value_type
 }
 
 template <class Dist>
-inline typename Dist::value_type 
+BOOST_MATH_GPU_ENABLED inline typename Dist::value_type 
    inverse_discrete_quantile(
       const Dist& dist,
       const typename Dist::value_type& p,
@@ -543,7 +554,7 @@ inline typename Dist::value_type
       const typename Dist::value_type& multiplier,
       const typename Dist::value_type& adder,
       const policies::discrete_quantile<policies::integer_round_nearest>&,
-      std::uintmax_t& max_iter)
+      boost::math::uintmax_t& max_iter)
 {
    typedef typename Dist::value_type value_type;
    BOOST_MATH_STD_USING
diff --git a/include/boost/math/distributions/exponential.hpp b/include/boost/math/distributions/exponential.hpp
index 164e01f205..9d45ac4933 100644
--- a/include/boost/math/distributions/exponential.hpp
+++ b/include/boost/math/distributions/exponential.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -6,12 +7,16 @@
 #ifndef BOOST_STATS_EXPONENTIAL_HPP
 #define BOOST_STATS_EXPONENTIAL_HPP
 
-#include <boost/math/distributions/fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/special_functions/expm1.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 #ifdef _MSC_VER
 # pragma warning(push)
@@ -19,8 +24,11 @@
 # pragma warning(disable: 4702) // unreachable code (return after domain_error throw).
 #endif
 
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
 #include <utility>
 #include <cmath>
+#endif
 
 namespace boost{ namespace math{
 
@@ -29,7 +37,7 @@ namespace detail{
 // Error check:
 //
 template <class RealType, class Policy>
-inline bool verify_lambda(const char* function, RealType l, RealType* presult, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool verify_lambda(const char* function, RealType l, RealType* presult, const Policy& pol)
 {
    if((l <= 0) || !(boost::math::isfinite)(l))
    {
@@ -42,7 +50,7 @@ inline bool verify_lambda(const char* function, RealType l, RealType* presult, c
 }
 
 template <class RealType, class Policy>
-inline bool verify_exp_x(const char* function, RealType x, RealType* presult, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool verify_exp_x(const char* function, RealType x, RealType* presult, const Policy& pol)
 {
    if((x < 0) || (boost::math::isnan)(x))
    {
@@ -63,14 +71,14 @@ class exponential_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit exponential_distribution(RealType l_lambda = 1)
+   BOOST_MATH_GPU_ENABLED explicit exponential_distribution(RealType l_lambda = 1)
       : m_lambda(l_lambda)
    {
       RealType err;
       detail::verify_lambda("boost::math::exponential_distribution<%1%>::exponential_distribution", l_lambda, &err, Policy());
    } // exponential_distribution
 
-   RealType lambda()const { return m_lambda; }
+   BOOST_MATH_GPU_ENABLED RealType lambda()const { return m_lambda; }
 
 private:
    RealType m_lambda;
@@ -84,35 +92,35 @@ exponential_distribution(RealType)->exponential_distribution<typename boost::mat
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const exponential_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const exponential_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   { 
-    return std::pair<RealType, RealType>(static_cast<RealType>(0), std::numeric_limits<RealType>::infinity()); // 0 to + infinity.
+    return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), boost::math::numeric_limits<RealType>::infinity()); // 0 to + infinity.
   }
   else
   {
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + max
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + max
   }
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const exponential_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const exponential_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
    using boost::math::tools::min_value;
-   return std::pair<RealType, RealType>(min_value<RealType>(),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(min_value<RealType>(),  max_value<RealType>());
    // min_value<RealType>() to avoid a discontinuity at x = 0.
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::pdf(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const exponential_distribution<%1%>&, %1%)";
 
    RealType lambda = dist.lambda();
    RealType result = 0;
@@ -128,14 +136,14 @@ inline RealType pdf(const exponential_distribution<RealType, Policy>& dist, cons
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::logpdf(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logpdf(const exponential_distribution<%1%>&, %1%)";
 
    RealType lambda = dist.lambda();
-   RealType result = -std::numeric_limits<RealType>::infinity();
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
    if(0 == detail::verify_lambda(function, lambda, &result, Policy()))
       return result;
    if(0 == detail::verify_exp_x(function, x, &result, Policy()))
@@ -146,11 +154,11 @@ inline RealType logpdf(const exponential_distribution<RealType, Policy>& dist, c
 } // logpdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    RealType lambda = dist.lambda();
@@ -164,11 +172,11 @@ inline RealType cdf(const exponential_distribution<RealType, Policy>& dist, cons
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType logcdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const exponential_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    RealType lambda = dist.lambda();
@@ -182,11 +190,11 @@ inline RealType logcdf(const exponential_distribution<RealType, Policy>& dist, c
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const exponential_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const exponential_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    RealType lambda = dist.lambda();
@@ -205,11 +213,11 @@ inline RealType quantile(const exponential_distribution<RealType, Policy>& dist,
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<exponential_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<exponential_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const exponential_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    RealType lambda = c.dist.lambda();
@@ -226,11 +234,11 @@ inline RealType cdf(const complemented2_type<exponential_distribution<RealType,
 }
 
 template <class RealType, class Policy>
-inline RealType logcdf(const complemented2_type<exponential_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<exponential_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logcdf(const exponential_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    RealType lambda = c.dist.lambda();
@@ -247,11 +255,11 @@ inline RealType logcdf(const complemented2_type<exponential_distribution<RealTyp
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<exponential_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<exponential_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const exponential_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    RealType lambda = c.dist.lambda();
@@ -272,7 +280,7 @@ inline RealType quantile(const complemented2_type<exponential_distribution<RealT
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const exponential_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const exponential_distribution<RealType, Policy>& dist)
 {
    RealType result = 0;
    RealType lambda = dist.lambda();
@@ -282,7 +290,7 @@ inline RealType mean(const exponential_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType standard_deviation(const exponential_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const exponential_distribution<RealType, Policy>& dist)
 {
    RealType result = 0;
    RealType lambda = dist.lambda();
@@ -292,38 +300,38 @@ inline RealType standard_deviation(const exponential_distribution<RealType, Poli
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const exponential_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const exponential_distribution<RealType, Policy>& /*dist*/)
 {
    return 0;
 }
 
 template <class RealType, class Policy>
-inline RealType median(const exponential_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const exponential_distribution<RealType, Policy>& dist)
 {
    using boost::math::constants::ln_two;
    return ln_two<RealType>() / dist.lambda(); // ln(2) / lambda
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const exponential_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const exponential_distribution<RealType, Policy>& /*dist*/)
 {
    return 2;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const exponential_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const exponential_distribution<RealType, Policy>& /*dist*/)
 {
    return 9;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const exponential_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const exponential_distribution<RealType, Policy>& /*dist*/)
 {
    return 6;
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const exponential_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const exponential_distribution<RealType, Policy>& dist)
 {
    using std::log;
    return 1 - log(dist.lambda());
diff --git a/include/boost/math/distributions/extreme_value.hpp b/include/boost/math/distributions/extreme_value.hpp
index 1bde2743c0..73454d29d4 100644
--- a/include/boost/math/distributions/extreme_value.hpp
+++ b/include/boost/math/distributions/extreme_value.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -6,12 +7,17 @@
 #ifndef BOOST_STATS_EXTREME_VALUE_HPP
 #define BOOST_STATS_EXTREME_VALUE_HPP
 
-#include <boost/math/distributions/fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/precision.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/special_functions/expm1.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 //
 // This is the maximum extreme value distribution, see
@@ -20,8 +26,11 @@
 // Also known as a Fisher-Tippett distribution, a log-Weibull
 // distribution or a Gumbel distribution.
 
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
 #include <utility>
 #include <cmath>
+#endif
 
 #ifdef _MSC_VER
 # pragma warning(push)
@@ -35,7 +44,7 @@ namespace detail{
 // Error check:
 //
 template <class RealType, class Policy>
-inline bool verify_scale_b(const char* function, RealType b, RealType* presult, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline bool verify_scale_b(const char* function, RealType b, RealType* presult, const Policy& pol)
 {
    if((b <= 0) || !(boost::math::isfinite)(b))
    {
@@ -56,7 +65,7 @@ class extreme_value_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit extreme_value_distribution(RealType a = 0, RealType b = 1)
+   BOOST_MATH_GPU_ENABLED explicit extreme_value_distribution(RealType a = 0, RealType b = 1)
       : m_a(a), m_b(b)
    {
       RealType err;
@@ -64,8 +73,8 @@ class extreme_value_distribution
       detail::check_finite("boost::math::extreme_value_distribution<%1%>::extreme_value_distribution", a, &err, Policy());
    } // extreme_value_distribution
 
-   RealType location()const { return m_a; }
-   RealType scale()const { return m_b; }
+   BOOST_MATH_GPU_ENABLED RealType location()const { return m_a; }
+   BOOST_MATH_GPU_ENABLED RealType scale()const { return m_b; }
 
 private:
    RealType m_a;
@@ -82,28 +91,28 @@ extreme_value_distribution(RealType,RealType)->extreme_value_distribution<typena
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const extreme_value_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const extreme_value_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(
-      std::numeric_limits<RealType>::has_infinity ? -std::numeric_limits<RealType>::infinity() : -max_value<RealType>(), 
-      std::numeric_limits<RealType>::has_infinity ? std::numeric_limits<RealType>::infinity() : max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(
+      boost::math::numeric_limits<RealType>::has_infinity ? -boost::math::numeric_limits<RealType>::infinity() : -max_value<RealType>(), 
+      boost::math::numeric_limits<RealType>::has_infinity ? boost::math::numeric_limits<RealType>::infinity() : max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const extreme_value_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const extreme_value_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(-max_value<RealType>(),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(-max_value<RealType>(),  max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::pdf(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const extreme_value_distribution<%1%>&, %1%)";
 
    RealType a = dist.location();
    RealType b = dist.scale();
@@ -124,15 +133,15 @@ inline RealType pdf(const extreme_value_distribution<RealType, Policy>& dist, co
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::logpdf(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logpdf(const extreme_value_distribution<%1%>&, %1%)";
 
    RealType a = dist.location();
    RealType b = dist.scale();
-   RealType result = -std::numeric_limits<RealType>::infinity();
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
    if(0 == detail::verify_scale_b(function, b, &result, Policy()))
       return result;
    if(0 == detail::check_finite(function, a, &result, Policy()))
@@ -149,11 +158,11 @@ inline RealType logpdf(const extreme_value_distribution<RealType, Policy>& dist,
 } // logpdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)";
 
    if((boost::math::isinf)(x))
       return x < 0 ? 0.0f : 1.0f;
@@ -175,11 +184,11 @@ inline RealType cdf(const extreme_value_distribution<RealType, Policy>& dist, co
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType logcdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const extreme_value_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)";
 
    if((boost::math::isinf)(x))
       return x < 0 ? 0.0f : 1.0f;
@@ -201,11 +210,11 @@ inline RealType logcdf(const extreme_value_distribution<RealType, Policy>& dist,
 } // logcdf
 
 template <class RealType, class Policy>
-RealType quantile(const extreme_value_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED RealType quantile(const extreme_value_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)";
 
    RealType a = dist.location();
    RealType b = dist.scale();
@@ -228,11 +237,11 @@ RealType quantile(const extreme_value_distribution<RealType, Policy>& dist, cons
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<extreme_value_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<extreme_value_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const extreme_value_distribution<%1%>&, %1%)";
 
    if((boost::math::isinf)(c.param))
       return c.param < 0 ? 1.0f : 0.0f;
@@ -252,11 +261,11 @@ inline RealType cdf(const complemented2_type<extreme_value_distribution<RealType
 }
 
 template <class RealType, class Policy>
-inline RealType logcdf(const complemented2_type<extreme_value_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<extreme_value_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logcdf(const extreme_value_distribution<%1%>&, %1%)";
 
    if((boost::math::isinf)(c.param))
       return c.param < 0 ? 1.0f : 0.0f;
@@ -276,11 +285,11 @@ inline RealType logcdf(const complemented2_type<extreme_value_distribution<RealT
 }
 
 template <class RealType, class Policy>
-RealType quantile(const complemented2_type<extreme_value_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED RealType quantile(const complemented2_type<extreme_value_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const extreme_value_distribution<%1%>&, %1%)";
 
    RealType a = c.dist.location();
    RealType b = c.dist.scale();
@@ -304,7 +313,7 @@ RealType quantile(const complemented2_type<extreme_value_distribution<RealType,
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const extreme_value_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const extreme_value_distribution<RealType, Policy>& dist)
 {
    RealType a = dist.location();
    RealType b = dist.scale();
@@ -317,7 +326,7 @@ inline RealType mean(const extreme_value_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType standard_deviation(const extreme_value_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const extreme_value_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING // for ADL of std functions.
 
@@ -331,20 +340,20 @@ inline RealType standard_deviation(const extreme_value_distribution<RealType, Po
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const extreme_value_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const extreme_value_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
 
 template <class RealType, class Policy>
-inline RealType median(const extreme_value_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const extreme_value_distribution<RealType, Policy>& dist)
 {
   using constants::ln_ln_two;
    return dist.location() - dist.scale() * ln_ln_two<RealType>();
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const extreme_value_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const extreme_value_distribution<RealType, Policy>& /*dist*/)
 {
    //
    // This is 12 * sqrt(6) * zeta(3) / pi^3:
@@ -354,14 +363,14 @@ inline RealType skewness(const extreme_value_distribution<RealType, Policy>& /*d
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const extreme_value_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const extreme_value_distribution<RealType, Policy>& /*dist*/)
 {
    // See http://mathworld.wolfram.com/ExtremeValueDistribution.html
    return RealType(27) / 5;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const extreme_value_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const extreme_value_distribution<RealType, Policy>& /*dist*/)
 {
    // See http://mathworld.wolfram.com/ExtremeValueDistribution.html
    return RealType(12) / 5;
diff --git a/include/boost/math/distributions/fisher_f.hpp b/include/boost/math/distributions/fisher_f.hpp
index e22cdf50ae..56b288d88e 100644
--- a/include/boost/math/distributions/fisher_f.hpp
+++ b/include/boost/math/distributions/fisher_f.hpp
@@ -1,5 +1,5 @@
 // Copyright John Maddock 2006.
-
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -8,14 +8,15 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_FISHER_F_HPP
 #define BOOST_MATH_DISTRIBUTIONS_FISHER_F_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/promotion.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/beta.hpp> // for incomplete beta.
 #include <boost/math/distributions/complement.hpp> // complements
 #include <boost/math/distributions/detail/common_error_handling.hpp> // error checks
 #include <boost/math/special_functions/fpclassify.hpp>
 
-#include <utility>
-
 namespace boost{ namespace math{
 
 template <class RealType = double, class Policy = policies::policy<> >
@@ -25,9 +26,9 @@ class fisher_f_distribution
    typedef RealType value_type;
    typedef Policy policy_type;
 
-   fisher_f_distribution(const RealType& i, const RealType& j) : m_df1(i), m_df2(j)
+   BOOST_MATH_GPU_ENABLED fisher_f_distribution(const RealType& i, const RealType& j) : m_df1(i), m_df2(j)
    {
-      static const char* function = "fisher_f_distribution<%1%>::fisher_f_distribution";
+      constexpr auto function = "fisher_f_distribution<%1%>::fisher_f_distribution";
       RealType result;
       detail::check_df(
          function, m_df1, &result, Policy());
@@ -35,11 +36,11 @@ class fisher_f_distribution
          function, m_df2, &result, Policy());
    } // fisher_f_distribution
 
-   RealType degrees_of_freedom1()const
+   BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const
    {
       return m_df1;
    }
-   RealType degrees_of_freedom2()const
+   BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const
    {
       return m_df2;
    }
@@ -60,29 +61,29 @@ fisher_f_distribution(RealType,RealType)->fisher_f_distribution<typename boost::
 #endif
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> range(const fisher_f_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const fisher_f_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> support(const fisher_f_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const fisher_f_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-RealType pdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED RealType pdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
    RealType df1 = dist.degrees_of_freedom1();
    RealType df2 = dist.degrees_of_freedom2();
    // Error check:
    RealType error_result = 0;
-   static const char* function = "boost::math::pdf(fisher_f_distribution<%1%> const&, %1%)";
+   constexpr auto function = "boost::math::pdf(fisher_f_distribution<%1%> const&, %1%)";
    if(false == (detail::check_df(
          function, df1, &error_result, Policy())
          && detail::check_df(
@@ -132,9 +133,9 @@ RealType pdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const fisher_f_distribution<RealType, Policy>& dist, const RealType& x)
 {
-   static const char* function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
+   constexpr auto function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
    RealType df1 = dist.degrees_of_freedom1();
    RealType df2 = dist.degrees_of_freedom2();
    // Error check:
@@ -167,9 +168,9 @@ inline RealType cdf(const fisher_f_distribution<RealType, Policy>& dist, const R
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const fisher_f_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const fisher_f_distribution<RealType, Policy>& dist, const RealType& p)
 {
-   static const char* function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
+   constexpr auto function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
    RealType df1 = dist.degrees_of_freedom1();
    RealType df2 = dist.degrees_of_freedom2();
    // Error check:
@@ -192,9 +193,9 @@ inline RealType quantile(const fisher_f_distribution<RealType, Policy>& dist, co
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
 {
-   static const char* function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
+   constexpr auto function = "boost::math::cdf(fisher_f_distribution<%1%> const&, %1%)";
    RealType df1 = c.dist.degrees_of_freedom1();
    RealType df2 = c.dist.degrees_of_freedom2();
    RealType x = c.param;
@@ -228,9 +229,9 @@ inline RealType cdf(const complemented2_type<fisher_f_distribution<RealType, Pol
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<fisher_f_distribution<RealType, Policy>, RealType>& c)
 {
-   static const char* function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
+   constexpr auto function = "boost::math::quantile(fisher_f_distribution<%1%> const&, %1%)";
    RealType df1 = c.dist.degrees_of_freedom1();
    RealType df2 = c.dist.degrees_of_freedom2();
    RealType p = c.param;
@@ -252,9 +253,9 @@ inline RealType quantile(const complemented2_type<fisher_f_distribution<RealType
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const fisher_f_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const fisher_f_distribution<RealType, Policy>& dist)
 { // Mean of F distribution = v.
-   static const char* function = "boost::math::mean(fisher_f_distribution<%1%> const&)";
+   constexpr auto function = "boost::math::mean(fisher_f_distribution<%1%> const&)";
    RealType df1 = dist.degrees_of_freedom1();
    RealType df2 = dist.degrees_of_freedom2();
    // Error check:
@@ -273,9 +274,9 @@ inline RealType mean(const fisher_f_distribution<RealType, Policy>& dist)
 } // mean
 
 template <class RealType, class Policy>
-inline RealType variance(const fisher_f_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const fisher_f_distribution<RealType, Policy>& dist)
 { // Variance of F distribution.
-   static const char* function = "boost::math::variance(fisher_f_distribution<%1%> const&)";
+   constexpr auto function = "boost::math::variance(fisher_f_distribution<%1%> const&)";
    RealType df1 = dist.degrees_of_freedom1();
    RealType df2 = dist.degrees_of_freedom2();
    // Error check:
@@ -294,9 +295,9 @@ inline RealType variance(const fisher_f_distribution<RealType, Policy>& dist)
 } // variance
 
 template <class RealType, class Policy>
-inline RealType mode(const fisher_f_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const fisher_f_distribution<RealType, Policy>& dist)
 {
-   static const char* function = "boost::math::mode(fisher_f_distribution<%1%> const&)";
+   constexpr auto function = "boost::math::mode(fisher_f_distribution<%1%> const&)";
    RealType df1 = dist.degrees_of_freedom1();
    RealType df2 = dist.degrees_of_freedom2();
    // Error check:
@@ -317,15 +318,15 @@ inline RealType mode(const fisher_f_distribution<RealType, Policy>& dist)
 //template <class RealType, class Policy>
 //inline RealType median(const fisher_f_distribution<RealType, Policy>& dist)
 //{ // Median of Fisher F distribution is not defined.
-//  return tools::domain_error<RealType>(BOOST_CURRENT_FUNCTION, "Median is not implemented, result is %1%!", std::numeric_limits<RealType>::quiet_NaN());
+//  return tools::domain_error<RealType>(BOOST_CURRENT_FUNCTION, "Median is not implemented, result is %1%!", boost::math::numeric_limits<RealType>::quiet_NaN());
 //  } // median
 
 // Now implemented via quantile(half) in derived accessors.
 
 template <class RealType, class Policy>
-inline RealType skewness(const fisher_f_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const fisher_f_distribution<RealType, Policy>& dist)
 {
-   static const char* function = "boost::math::skewness(fisher_f_distribution<%1%> const&)";
+   constexpr auto function = "boost::math::skewness(fisher_f_distribution<%1%> const&)";
    BOOST_MATH_STD_USING // ADL of std names
    // See http://mathworld.wolfram.com/F-Distribution.html
    RealType df1 = dist.degrees_of_freedom1();
@@ -346,18 +347,18 @@ inline RealType skewness(const fisher_f_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist);
+BOOST_MATH_GPU_ENABLED RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist);
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const fisher_f_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const fisher_f_distribution<RealType, Policy>& dist)
 {
    return 3 + kurtosis_excess(dist);
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const fisher_f_distribution<RealType, Policy>& dist)
 {
-   static const char* function = "boost::math::kurtosis_excess(fisher_f_distribution<%1%> const&)";
+   constexpr auto function = "boost::math::kurtosis_excess(fisher_f_distribution<%1%> const&)";
    // See http://mathworld.wolfram.com/F-Distribution.html
    RealType df1 = dist.degrees_of_freedom1();
    RealType df2 = dist.degrees_of_freedom2();
diff --git a/include/boost/math/distributions/fwd.hpp b/include/boost/math/distributions/fwd.hpp
index a3c1a41df5..ccb3c0cd1b 100644
--- a/include/boost/math/distributions/fwd.hpp
+++ b/include/boost/math/distributions/fwd.hpp
@@ -66,6 +66,18 @@ class inverse_gaussian_distribution;
 template <class RealType, class Policy>
 class kolmogorov_smirnov_distribution;
 
+template <class RealType, class Policy>
+class landau_distribution;
+
+template <class RealType, class Policy>
+class mapairy_distribution;
+
+template <class RealType, class Policy>
+class holtsmark_distribution;
+
+template <class RealType, class Policy>
+class saspoint5_distribution;
+
 template <class RealType, class Policy>
 class laplace_distribution;
 
@@ -136,6 +148,10 @@ class weibull_distribution;
    typedef boost::math::inverse_chi_squared_distribution<Type, Policy> inverse_chi_squared;\
    typedef boost::math::inverse_gaussian_distribution<Type, Policy> inverse_gaussian;\
    typedef boost::math::inverse_gamma_distribution<Type, Policy> inverse_gamma;\
+   typedef boost::math::landau_distribution<Type, Policy> landau;\
+   typedef boost::math::mapairy_distribution<Type, Policy> mapairy;\
+   typedef boost::math::holtsmark_distribution<Type, Policy> holtsmark;\
+   typedef boost::math::saspoint5_distribution<Type, Policy> saspoint5;\
    typedef boost::math::laplace_distribution<Type, Policy> laplace;\
    typedef boost::math::logistic_distribution<Type, Policy> logistic;\
    typedef boost::math::lognormal_distribution<Type, Policy> lognormal;\
diff --git a/include/boost/math/distributions/gamma.hpp b/include/boost/math/distributions/gamma.hpp
index 28b7c55b0b..5176f906d8 100644
--- a/include/boost/math/distributions/gamma.hpp
+++ b/include/boost/math/distributions/gamma.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,22 +11,22 @@
 // http://mathworld.wolfram.com/GammaDistribution.html
 // http://en.wikipedia.org/wiki/Gamma_distribution
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/special_functions/digamma.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/complement.hpp>
 
-#include <utility>
-#include <type_traits>
-
 namespace boost{ namespace math
 {
 namespace detail
 {
 
 template <class RealType, class Policy>
-inline bool check_gamma_shape(
+BOOST_MATH_GPU_ENABLED inline bool check_gamma_shape(
       const char* function,
       RealType shape,
       RealType* result, const Policy& pol)
@@ -41,7 +42,7 @@ inline bool check_gamma_shape(
 }
 
 template <class RealType, class Policy>
-inline bool check_gamma_x(
+BOOST_MATH_GPU_ENABLED inline bool check_gamma_x(
       const char* function,
       RealType const& x,
       RealType* result, const Policy& pol)
@@ -57,7 +58,7 @@ inline bool check_gamma_x(
 }
 
 template <class RealType, class Policy>
-inline bool check_gamma(
+BOOST_MATH_GPU_ENABLED inline bool check_gamma(
       const char* function,
       RealType scale,
       RealType shape,
@@ -75,19 +76,19 @@ class gamma_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit gamma_distribution(RealType l_shape, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED explicit gamma_distribution(RealType l_shape, RealType l_scale = 1)
       : m_shape(l_shape), m_scale(l_scale)
    {
       RealType result;
       detail::check_gamma("boost::math::gamma_distribution<%1%>::gamma_distribution", l_scale, l_shape, &result, Policy());
    }
 
-   RealType shape()const
+   BOOST_MATH_GPU_ENABLED RealType shape()const
    {
       return m_shape;
    }
 
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    {
       return m_scale;
    }
@@ -109,27 +110,27 @@ gamma_distribution(RealType,RealType)->gamma_distribution<typename boost::math::
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const gamma_distribution<RealType, Policy>& /* dist */)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const gamma_distribution<RealType, Policy>& /* dist */)
 { // Range of permissible values for random variable x.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const gamma_distribution<RealType, Policy>& /* dist */)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const gamma_distribution<RealType, Policy>& /* dist */)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
    using boost::math::tools::min_value;
-   return std::pair<RealType, RealType>(min_value<RealType>(),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(min_value<RealType>(),  max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const gamma_distribution<%1%>&, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -149,17 +150,17 @@ inline RealType pdf(const gamma_distribution<RealType, Policy>& dist, const Real
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
    using boost::math::lgamma;
 
-   static const char* function = "boost::math::logpdf(const gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logpdf(const gamma_distribution<%1%>&, %1%)";
 
    RealType k = dist.shape();
    RealType theta = dist.scale();
 
-   RealType result = -std::numeric_limits<RealType>::infinity();
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
    if(false == detail::check_gamma(function, theta, k, &result, Policy()))
       return result;
    if(false == detail::check_gamma_x(function, x, &result, Policy()))
@@ -167,7 +168,7 @@ inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const R
 
    if(x == 0)
    {
-      return std::numeric_limits<RealType>::quiet_NaN();
+      return boost::math::numeric_limits<RealType>::quiet_NaN();
    }
 
    result = -k*log(theta) + (k-1)*log(x) - lgamma(k) - (x/theta);
@@ -176,11 +177,11 @@ inline RealType logpdf(const gamma_distribution<RealType, Policy>& dist, const R
 } // logpdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const gamma_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const gamma_distribution<%1%>&, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -196,11 +197,11 @@ inline RealType cdf(const gamma_distribution<RealType, Policy>& dist, const Real
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const gamma_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const gamma_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -220,11 +221,11 @@ inline RealType quantile(const gamma_distribution<RealType, Policy>& dist, const
 }
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
 
    RealType shape = c.dist.shape();
    RealType scale = c.dist.scale();
@@ -241,11 +242,11 @@ inline RealType cdf(const complemented2_type<gamma_distribution<RealType, Policy
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<gamma_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
 
    RealType shape = c.dist.shape();
    RealType scale = c.dist.scale();
@@ -266,11 +267,11 @@ inline RealType quantile(const complemented2_type<gamma_distribution<RealType, P
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::mean(const gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::mean(const gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -284,11 +285,11 @@ inline RealType mean(const gamma_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType variance(const gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::variance(const gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::variance(const gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -302,11 +303,11 @@ inline RealType variance(const gamma_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::mode(const gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::mode(const gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -331,11 +332,11 @@ inline RealType mode(const gamma_distribution<RealType, Policy>& dist)
 //}
 
 template <class RealType, class Policy>
-inline RealType skewness(const gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::skewness(const gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::skewness(const gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -349,11 +350,11 @@ inline RealType skewness(const gamma_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::kurtosis_excess(const gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -367,18 +368,19 @@ inline RealType kurtosis_excess(const gamma_distribution<RealType, Policy>& dist
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const gamma_distribution<RealType, Policy>& dist)
 {
    return kurtosis_excess(dist) + 3;
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const gamma_distribution<RealType, Policy>& dist)
 {
+   BOOST_MATH_STD_USING
+
    RealType k = dist.shape();
    RealType theta = dist.scale();
-   using std::log;
-   return k + log(theta) + lgamma(k) + (1-k)*digamma(k);
+   return k + log(theta) + boost::math::lgamma(k) + (1-k)*digamma(k);
 }
 
 } // namespace math
diff --git a/include/boost/math/distributions/geometric.hpp b/include/boost/math/distributions/geometric.hpp
index 7c511ef2db..0a7b383c24 100644
--- a/include/boost/math/distributions/geometric.hpp
+++ b/include/boost/math/distributions/geometric.hpp
@@ -36,6 +36,9 @@
 #ifndef BOOST_MATH_SPECIAL_GEOMETRIC_HPP
 #define BOOST_MATH_SPECIAL_GEOMETRIC_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x) == Ix(a, b).
 #include <boost/math/distributions/complement.hpp> // complement.
@@ -45,10 +48,6 @@
 #include <boost/math/distributions/detail/inv_discrete_quantile.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 
-#include <limits> // using std::numeric_limits;
-#include <utility>
-#include <cmath>
-
 #if defined (BOOST_MSVC)
 #  pragma warning(push)
 // This believed not now necessary, so commented out.
@@ -64,7 +63,7 @@ namespace boost
     {
       // Common error checking routines for geometric distribution function:
       template <class RealType, class Policy>
-      inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
       {
         if( !(boost::math::isfinite)(p) || (p < 0) || (p > 1) )
         {
@@ -77,13 +76,13 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& p, RealType* result, const Policy& pol)
       {
         return check_success_fraction(function, p, result, pol);
       }
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_k(const char* function,  const RealType& p, RealType k, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function,  const RealType& p, RealType k, RealType* result, const Policy& pol)
       {
         if(check_dist(function, p, result, pol) == false)
         {
@@ -100,7 +99,7 @@ namespace boost
       } // Check_dist_and_k
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType p, RealType prob, RealType* result, const Policy& pol)
       {
         if((check_dist(function, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false)
         {
@@ -117,7 +116,7 @@ namespace boost
       typedef RealType value_type;
       typedef Policy policy_type;
 
-      geometric_distribution(RealType p) : m_p(p)
+      BOOST_MATH_GPU_ENABLED geometric_distribution(RealType p) : m_p(p)
       { // Constructor stores success_fraction p.
         RealType result;
         geometric_detail::check_dist(
@@ -127,22 +126,22 @@ namespace boost
       } // geometric_distribution constructor.
 
       // Private data getter class member functions.
-      RealType success_fraction() const
+      BOOST_MATH_GPU_ENABLED RealType success_fraction() const
       { // Probability of success as fraction in range 0 to 1.
         return m_p;
       }
-      RealType successes() const
+      BOOST_MATH_GPU_ENABLED RealType successes() const
       { // Total number of successes r = 1 (for compatibility with negative binomial?).
         return 1;
       }
 
       // Parameter estimation.
       // (These are copies of negative_binomial distribution with successes = 1).
-      static RealType find_lower_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p(
         RealType trials,
         RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test.
       {
-        static const char* function = "boost::math::geometric<%1%>::find_lower_bound_on_p";
+        constexpr auto function = "boost::math::geometric<%1%>::find_lower_bound_on_p";
         RealType result = 0;  // of error checks.
         RealType successes = 1;
         RealType failures = trials - successes;
@@ -163,11 +162,11 @@ namespace boost
         return ibeta_inv(successes, failures + 1, alpha, static_cast<RealType*>(nullptr), Policy());
       } // find_lower_bound_on_p
 
-      static RealType find_upper_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p(
         RealType trials,
         RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test.
       {
-        static const char* function = "boost::math::geometric<%1%>::find_upper_bound_on_p";
+        constexpr auto function = "boost::math::geometric<%1%>::find_upper_bound_on_p";
         RealType result = 0;  // of error checks.
         RealType successes = 1;
         RealType failures = trials - successes;
@@ -195,12 +194,12 @@ namespace boost
       // Estimate number of trials :
       // "How many trials do I need to be P% sure of seeing k or fewer failures?"
 
-      static RealType find_minimum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials(
         RealType k,     // number of failures (k >= 0).
         RealType p,     // success fraction 0 <= p <= 1.
         RealType alpha) // risk level threshold 0 <= alpha <= 1.
       {
-        static const char* function = "boost::math::geometric<%1%>::find_minimum_number_of_trials";
+        constexpr auto function = "boost::math::geometric<%1%>::find_minimum_number_of_trials";
         // Error checks:
         RealType result = 0;
         if(false == geometric_detail::check_dist_and_k(
@@ -213,12 +212,12 @@ namespace boost
         return result + k;
       } // RealType find_number_of_failures
 
-      static RealType find_maximum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials(
         RealType k,     // number of failures (k >= 0).
         RealType p,     // success fraction 0 <= p <= 1.
         RealType alpha) // risk level threshold 0 <= alpha <= 1.
       {
-        static const char* function = "boost::math::geometric<%1%>::find_maximum_number_of_trials";
+        constexpr auto function = "boost::math::geometric<%1%>::find_maximum_number_of_trials";
         // Error checks:
         RealType result = 0;
         if(false == geometric_detail::check_dist_and_k(
@@ -244,22 +243,22 @@ namespace boost
     #endif
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const geometric_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const geometric_distribution<RealType, Policy>& /* dist */)
     { // Range of permissible values for random variable k.
        using boost::math::tools::max_value;
-       return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
+       return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
     }
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const geometric_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const geometric_distribution<RealType, Policy>& /* dist */)
     { // Range of supported values for random variable k.
        // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
        using boost::math::tools::max_value;
-       return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>()); // max_integer?
+       return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>()); // max_integer?
     }
 
     template <class RealType, class Policy>
-    inline RealType mean(const geometric_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const geometric_distribution<RealType, Policy>& dist)
     { // Mean of geometric distribution = (1-p)/p.
       return (1 - dist.success_fraction() ) / dist.success_fraction();
     } // mean
@@ -267,21 +266,21 @@ namespace boost
     // median implemented via quantile(half) in derived accessors.
 
     template <class RealType, class Policy>
-    inline RealType mode(const geometric_distribution<RealType, Policy>&)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const geometric_distribution<RealType, Policy>&)
     { // Mode of geometric distribution = zero.
       BOOST_MATH_STD_USING // ADL of std functions.
       return 0;
     } // mode
 
     template <class RealType, class Policy>
-    inline RealType variance(const geometric_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const geometric_distribution<RealType, Policy>& dist)
     { // Variance of Binomial distribution = (1-p) / p^2.
       return  (1 - dist.success_fraction())
         / (dist.success_fraction() * dist.success_fraction());
     } // variance
 
     template <class RealType, class Policy>
-    inline RealType skewness(const geometric_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const geometric_distribution<RealType, Policy>& dist)
     { // skewness of geometric distribution = 2-p / (sqrt(r(1-p))
       BOOST_MATH_STD_USING // ADL of std functions.
       RealType p = dist.success_fraction();
@@ -289,7 +288,7 @@ namespace boost
     } // skewness
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const geometric_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const geometric_distribution<RealType, Policy>& dist)
     { // kurtosis of geometric distribution
       // http://en.wikipedia.org/wiki/geometric is kurtosis_excess so add 3
       RealType p = dist.success_fraction();
@@ -297,7 +296,7 @@ namespace boost
     } // kurtosis
 
      template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const geometric_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const geometric_distribution<RealType, Policy>& dist)
     { // kurtosis excess of geometric distribution
       // http://mathworld.wolfram.com/Kurtosis.html table of kurtosis_excess
       RealType p = dist.success_fraction();
@@ -312,11 +311,11 @@ namespace boost
     // chf of geometric distribution provided by derived accessors.
 
     template <class RealType, class Policy>
-    inline RealType pdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED inline RealType pdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
     { // Probability Density/Mass Function.
       BOOST_FPU_EXCEPTION_GUARD
       BOOST_MATH_STD_USING  // For ADL of math functions.
-      static const char* function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::pdf(const geometric_distribution<%1%>&, %1%)";
 
       RealType p = dist.success_fraction();
       RealType result = 0;
@@ -350,9 +349,9 @@ namespace boost
     } // geometric_pdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
     { // Cumulative Distribution Function of geometric.
-      static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
 
       // k argument may be integral, signed, or unsigned, or floating point.
       // If necessary, it has already been promoted from an integral type.
@@ -381,12 +380,10 @@ namespace boost
     } // cdf Cumulative Distribution Function geometric.
 
     template <class RealType, class Policy>
-    inline RealType logcdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED inline RealType logcdf(const geometric_distribution<RealType, Policy>& dist, const RealType& k)
     { // Cumulative Distribution Function of geometric.
-      using std::pow;
-      using std::log;
-      using std::exp;
-      static const char* function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
+      BOOST_MATH_STD_USING
+      constexpr auto function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
 
       // k argument may be integral, signed, or unsigned, or floating point.
       // If necessary, it has already been promoted from an integral type.
@@ -399,7 +396,7 @@ namespace boost
         k,
         &result, Policy()))
       {
-        return -std::numeric_limits<RealType>::infinity();
+        return -boost::math::numeric_limits<RealType>::infinity();
       }
       if(k == 0)
       {
@@ -413,10 +410,10 @@ namespace boost
     } // logcdf Cumulative Distribution Function geometric.
 
     template <class RealType, class Policy>
-    inline RealType cdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
     { // Complemented Cumulative Distribution Function geometric.
       BOOST_MATH_STD_USING
-      static const char* function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::cdf(const geometric_distribution<%1%>&, %1%)";
       // k argument may be integral, signed, or unsigned, or floating point.
       // If necessary, it has already been promoted from an integral type.
       RealType const& k = c.param;
@@ -438,10 +435,10 @@ namespace boost
     } // cdf Complemented Cumulative Distribution Function geometric.
 
     template <class RealType, class Policy>
-    inline RealType logcdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
     { // Complemented Cumulative Distribution Function geometric.
       BOOST_MATH_STD_USING
-      static const char* function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::logcdf(const geometric_distribution<%1%>&, %1%)";
       // k argument may be integral, signed, or unsigned, or floating point.
       // If necessary, it has already been promoted from an integral type.
       RealType const& k = c.param;
@@ -455,21 +452,21 @@ namespace boost
         k,
         &result, Policy()))
       {
-        return -std::numeric_limits<RealType>::infinity();
+        return -boost::math::numeric_limits<RealType>::infinity();
       }
 
       return boost::math::log1p(-p, Policy()) * (k+1);
     } // logcdf Complemented Cumulative Distribution Function geometric.
 
     template <class RealType, class Policy>
-    inline RealType quantile(const geometric_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const geometric_distribution<RealType, Policy>& dist, const RealType& x)
     { // Quantile, percentile/100 or Percent Point geometric function.
       // Return the number of expected failures k for a given probability p.
 
       // Inverse cumulative Distribution Function or Quantile (percentile / 100) of geometric Probability.
       // k argument may be integral, signed, or unsigned, or floating point.
 
-      static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
       BOOST_MATH_STD_USING // ADL of std functions.
 
       RealType success_fraction = dist.success_fraction();
@@ -513,11 +510,11 @@ namespace boost
     } // RealType quantile(const geometric_distribution dist, p)
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<geometric_distribution<RealType, Policy>, RealType>& c)
     {  // Quantile or Percent Point Binomial function.
        // Return the number of expected failures k for a given
        // complement of the probability Q = 1 - P.
-       static const char* function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::quantile(const geometric_distribution<%1%>&, %1%)";
        BOOST_MATH_STD_USING
        // Error checks:
        RealType x = c.param;
diff --git a/include/boost/math/distributions/holtsmark.hpp b/include/boost/math/distributions/holtsmark.hpp
new file mode 100644
index 0000000000..04f5484f4e
--- /dev/null
+++ b/include/boost/math/distributions/holtsmark.hpp
@@ -0,0 +1,2518 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_STATS_HOLTSMARK_HPP
+#define BOOST_STATS_HOLTSMARK_HPP
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4127) // conditional expression is constant
+#endif
+
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/distributions/complement.hpp>
+#include <boost/math/distributions/detail/common_error_handling.hpp>
+#include <boost/math/distributions/detail/derived_accessors.hpp>
+#include <boost/math/tools/rational.hpp>
+#include <boost/math/special_functions/cbrt.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/tools/promotion.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
+#include <boost/math/tools/big_constant.hpp>
+#include <utility>
+#include <cmath>
+#endif
+
+namespace boost { namespace math {
+template <class RealType, class Policy>
+class holtsmark_distribution;
+
+namespace detail {
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 4.7894e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(2.87352751452164445024e-1),
+            static_cast<RealType>(1.18577398160636011811e-3),
+            static_cast<RealType>(-2.16526599226820153260e-2),
+            static_cast<RealType>(2.06462093371223113592e-3),
+            static_cast<RealType>(2.43382128013710116747e-3),
+            static_cast<RealType>(-2.15930711444603559520e-4),
+            static_cast<RealType>(-1.04197836740809694657e-4),
+            static_cast<RealType>(1.74679078247026597959e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.12654472808214997252e-3),
+            static_cast<RealType>(2.93891863033354755743e-1),
+            static_cast<RealType>(8.70867222155141724171e-3),
+            static_cast<RealType>(3.15027515421842640745e-2),
+            static_cast<RealType>(2.11141832312672190669e-3),
+            static_cast<RealType>(1.23545521355569424975e-3),
+            static_cast<RealType>(1.58181113865348637475e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.0925e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(2.02038159607840130389e-1),
+            static_cast<RealType>(-1.20368541260123112191e-2),
+            static_cast<RealType>(-3.19235497414059987151e-3),
+            static_cast<RealType>(8.88546222140257289852e-3),
+            static_cast<RealType>(-5.37287599824602316660e-4),
+            static_cast<RealType>(-2.39059149972922243276e-4),
+            static_cast<RealType>(9.19551014849109417931e-5),
+            static_cast<RealType>(-8.45210544648986348854e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(6.11634701234079515138e-1),
+            static_cast<RealType>(4.39922162828115412952e-1),
+            static_cast<RealType>(1.73609068791154078128e-1),
+            static_cast<RealType>(6.15831808473403962054e-2),
+            static_cast<RealType>(1.64364949550314788638e-2),
+            static_cast<RealType>(2.94399615562137394932e-3),
+            static_cast<RealType>(4.99662797033514776061e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.4499e-17
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(8.45396231261375200568e-2),
+            static_cast<RealType>(-9.15509628797205847643e-3),
+            static_cast<RealType>(1.82052933284907579374e-2),
+            static_cast<RealType>(-2.44157914076021125182e-4),
+            static_cast<RealType>(8.40871885414177705035e-4),
+            static_cast<RealType>(7.26592615882060553326e-5),
+            static_cast<RealType>(-1.87768359214600016641e-6),
+            static_cast<RealType>(1.65716961206268668529e-6),
+            static_cast<RealType>(-1.73979640146948858436e-7),
+            static_cast<RealType>(7.24351142163396584236e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.88099527896838765666e-1),
+            static_cast<RealType>(6.53896948546877341992e-1),
+            static_cast<RealType>(2.96296982585381844864e-1),
+            static_cast<RealType>(1.14107585229341489833e-1),
+            static_cast<RealType>(3.08914671331207488189e-2),
+            static_cast<RealType>(7.03139384769200902107e-3),
+            static_cast<RealType>(1.01201814277918577790e-3),
+            static_cast<RealType>(1.12200113270398674535e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.5259e-17
+        BOOST_MATH_STATIC const RealType P[11] = {
+            static_cast<RealType>(1.36729417918039395222e-2),
+            static_cast<RealType>(1.19749117683408419115e-2),
+            static_cast<RealType>(6.26780921592414207398e-3),
+            static_cast<RealType>(1.84846137440857608948e-3),
+            static_cast<RealType>(3.39307829797262466829e-4),
+            static_cast<RealType>(2.73606960463362090866e-5),
+            static_cast<RealType>(-1.14419838471713498717e-7),
+            static_cast<RealType>(1.64552336875610576993e-8),
+            static_cast<RealType>(-7.95501797873739398143e-10),
+            static_cast<RealType>(2.55422885338760255125e-11),
+            static_cast<RealType>(-4.12196487201928768038e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.61334003864149486454e0),
+            static_cast<RealType>(1.28348868912975898501e0),
+            static_cast<RealType>(6.36594545291321210154e-1),
+            static_cast<RealType>(2.11478937436277242988e-1),
+            static_cast<RealType>(4.71550897200311391579e-2),
+            static_cast<RealType>(6.64679677197059316835e-3),
+            static_cast<RealType>(4.93706832858615742810e-4),
+            static_cast<RealType>(9.26919465059204396228e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.5084e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.90649774685568282390e-3),
+            static_cast<RealType>(7.43708409389806210196e-4),
+            static_cast<RealType>(9.53777347766128955847e-5),
+            static_cast<RealType>(3.79800193823252979170e-6),
+            static_cast<RealType>(2.84836656088572745575e-8),
+            static_cast<RealType>(-1.22715411241721187620e-10),
+            static_cast<RealType>(8.56789906419220801109e-13),
+            static_cast<RealType>(-4.17784858891714869163e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(7.29383849235788831455e-1),
+            static_cast<RealType>(2.16287201867831015266e-1),
+            static_cast<RealType>(3.28789040872705709070e-2),
+            static_cast<RealType>(2.64660789801664804789e-3),
+            static_cast<RealType>(1.03662724048874906931e-4),
+            static_cast<RealType>(1.47658125632566407978e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.4660e-19
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(3.07231582988207590928e-4),
+            static_cast<RealType>(5.16108848485823513911e-5),
+            static_cast<RealType>(3.05776014220862257678e-6),
+            static_cast<RealType>(7.64787444325088143218e-8),
+            static_cast<RealType>(7.40426355029090813961e-10),
+            static_cast<RealType>(1.57451122102115077046e-12),
+            static_cast<RealType>(-2.14505675750572782093e-15),
+            static_cast<RealType>(5.11204601013038698192e-18),
+            static_cast<RealType>(-9.00826023095223871551e-21),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.28966789835486457746e-1),
+            static_cast<RealType>(4.46981634258601621625e-2),
+            static_cast<RealType>(3.22521297380474263906e-3),
+            static_cast<RealType>(1.31985203433890010111e-4),
+            static_cast<RealType>(3.01507121087942156530e-6),
+            static_cast<RealType>(3.47777238523841835495e-8),
+            static_cast<RealType>(1.50780503777979189972e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.2292e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(5.25741312407933720817e-5),
+            static_cast<RealType>(2.34425802342454046697e-6),
+            static_cast<RealType>(3.30042747965497652847e-8),
+            static_cast<RealType>(1.58564820095683252738e-10),
+            static_cast<RealType>(1.54070758384735212486e-13),
+            static_cast<RealType>(-8.89232435250437247197e-17),
+            static_cast<RealType>(8.14099948000080417199e-20),
+            static_cast<RealType>(-4.61828164399178360925e-23),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.23544974283127158019e-1),
+            static_cast<RealType>(6.01210465184576626802e-3),
+            static_cast<RealType>(1.45390926665383063500e-4),
+            static_cast<RealType>(1.80594709695117864840e-6),
+            static_cast<RealType>(1.06088985542982155880e-8),
+            static_cast<RealType>(2.20287881724613104903e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x * x * x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.3004e-17
+        BOOST_MATH_STATIC const RealType P[4] = {
+            static_cast<RealType>(2.99206710301074508455e-1),
+            static_cast<RealType>(-8.62469397757826072306e-1),
+            static_cast<RealType>(1.74661995423629075890e-1),
+            static_cast<RealType>(8.75909164947413479137e-1),
+        };
+        BOOST_MATH_STATIC const RealType Q[3] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(-6.07405848111002255020e0),
+            static_cast<RealType>(1.34068401972703571636e1),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x;
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 4.5215e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87352751452164445024482162286994868262e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.07622509000285763173795736744991173600e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75004930885780661923539070646503039258e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.72358602484766333657370198137154157310e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80082654994455046054228833198744292689e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53887200727615005180492399966262970151e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07684195532179300820096260852073763880e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.39151986881253768780523679256708455051e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31700721746247708002568205696938014069e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.52538425285394123789751606057231671946e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.13997198703138372752313576244312091598e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74788965317036115104204201740144738267e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18994723428163008965406453309272880204e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.49208308902369087634036371223527932419e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.07053963271862256947338846403373278592e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30146528469038357598785392812229655811e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.22168809220570888957518451361426420755e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.30911708477464424748895247790513118077e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.32037605861909345291211474811347056388e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.37380742268959889784160508321242249326e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.17777859396994816599172003124202701362e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.69357597449425742856874347560067711953e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.22061268498705703002731594804187464212e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.03685918248668999775572498175163352453e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42037705933347925911510259098903765388e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13651251802353350402740200231061151003e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.15390928968620849348804301589542546367e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.96186359077726620124148756657971390386e-9),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.3996e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02038159607840130388931544845552929992e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.85240836242909590376775233472494840074e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92928437142375928121954427888812334305e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56075992368354834619445578502239925632e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85410663490566091471288623735720924369e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09160661432404033681463938555133581443e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60290555290385646856693819798655258098e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24420942563054709904053017769325945705e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06370233020823161157791461691510091864e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.51562554221298564845071290898761434388e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.77361020844998296791409508640756247324e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10768937536097342883548728871352580308e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.97810512763454658214572490850146305033e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77430867682132459087084564268263825239e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30030169049261634787262795838348954434e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45935676273909940847479638179887855033e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14724239378269259016679286177700667008e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21580123796578745240828564510740594111e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70287348745451818082884807214512422940e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46859813604124308580987785473592196488e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49627445316021031361394030382456867983e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05157712406194406440213776605199788051e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91541875103990251411297099611180353187e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47960462287955806798879139599079388744e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80126815763067695392857052825785263211e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04569118116204820761181992270024358122e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63024381269503801668229632579505279520e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00967434338725770754103109040982001783e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.6834e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45396231261375200568114750897618690566e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.83107635287140466760500899510899613385e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71690205829238281191309321676655995475e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95995611963950467634398178757261552497e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52444689050426648467863527289016233648e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.40423239472181137610649503303203209123e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72181273738390251101985797318639680476e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.11423032981781501087311583401963332916e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37255768388351332508195641748235373885e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25140171472943043666747084376053803301e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98925617316135247540832898350427842870e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27532592227329144332335468302536835334e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25846339430429852334026937219420930290e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17852693845678292024334670662803641322e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60008761860786244203651832067697976835e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.85474213475378978699789357283744252832e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05561259222780127064607109581719435800e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08902510590064634965634560548380735284e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.60127698266075086782895988567899172787e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73299227011247478433171171063045855612e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94019328695445269130845646745771017029e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21478511930928822349285105322914093227e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42888485420705779382804725954524839381e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36839484685440714657854206969200824442e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77082068469251728028552451884848161629e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.92625563541021144576900067220082880950e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88302521658522279293312672887766072876e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37703703342287521257351386589629343948e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32454189932655869016489443530062686013e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.81822848072558151338694737514507945151e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40176559099032106726456059226930240477e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.55722115663529425797132143276461872035e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18236697046568703899375072798708359035e-10),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.6207e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[20] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36729417918039395222067998266923903488e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05780369334958736210688756060527042344e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88449456199223796440901487003885388570e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20213624124017393492512893302682417041e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95009975955570002297453163471062373746e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35668345583965001606910217518443864382e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69006847702829685253055277085000792826e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08366922884479491780654020783735539561e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.71834368599657597252633517017213868956e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88269472722301903965736220481240654265e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37797139843759131750966129487745639531e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72390971590654495025982276782257590019e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68354503497961090303189233611418754374e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20749461042713568368181066233478264894e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71167265100639100355339812752823628805e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37497033071709741762372104386727560387e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08992504249040731356693038222581843266e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.03311745412603363076896897060158476094e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89266184062176002518506060373755160893e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.22157263424086267338486564980223658130e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24254809760594824834854946949546737102e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66740386908805016172202899592418717176e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17175023341071972435947261868288366592e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33939409711833786730168591434519989589e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.58859674176126567295417811572162232222e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66346764121676348703738437519493817401e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00687534341032230207422557716131339293e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57352381181825892637055619366793541271e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23955067096868711061473058513398543786e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28279376429637301814743591831507047825e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22380760186302431267562571014519501842e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21421839279245792393425090284615681867e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.80151544531415207189620615654737831345e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.57177992740786529976179511261318869505e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54223623314672019530719165336863142227e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26447311109866547647645308621478963788e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76514314007336173875469200193103772775e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.63785420481380041892410849615596985103e-13),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.8882e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90649774685568282389553481307707005425e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70151946710788532273869130544473159961e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76188245008605985768921328976193346788e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.94997481586873355765607596415761713534e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83556339450065349619118429405554762845e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39766178753196196595432796889473826698e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48835240264191055418415753552383932859e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23205178959384483669515397903609703992e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80665018951397281836428650435128239368e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27113208299726105096854812628329439191e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75272882929773945317046764560516449105e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.73174017370926101455204470047842394787e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.55548825213165929101134655786361059720e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.79786015549170518239230891794588988732e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73060731998834750292816218696923192789e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62842837946576938669447109511449827857e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33878078951302606409419167741041897986e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75629880937514507004822969528240262723e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43883005193126748135739157335919076027e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26826935326347315479579835343751624245e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52263130214924169696993839078084050641e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34708681216662922818631865761136370252e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19079618273418070513605131981401070622e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68812668867590621701228940772852924670e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81323523265546812020317698573638573275e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46655191174052062382710487986225631851e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79864553144116347379916608661549264281e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81866770335021233700248077520029108331e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15408288688082935176022095799735538723e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29421875915133979067465908221270435168e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74564282803894180881025348633912184161e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69782249847887916810010605635064672269e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85875986197737611300062229945990879767e-18),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.7988e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07231582988207590928480356376941073734e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35574911514921623999866392865480652576e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60219401814297026945664630716309317015e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84927222345566515103807882976184811760e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96327408363203008584583124982694689234e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86684048703029160378252571846517319101e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65469175974819997602752600929172261626e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.21842057555380199566706533446991680612e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53555106309423641769303386628162522042e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.92686543698369260585325449306538016446e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01838615452860702770059987567879856504e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65492535746962514730615062374864701860e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53395563720606494853374354984531107080e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.99957357701259203151690416786669242677e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46357124817620384236108395837490629563e-31),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02259092175256156108200465685980768901e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63438230616954606028022008517920766366e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63880061357592661176130881772975919418e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81911305852397235014131637306820512975e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09690724408294608306577482852270088377e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11275552068434583356476295833517496456e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.24681861037105338446379750828324925566e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16034379416965004687140768474445096709e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23234481703249409689976894391287818596e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93297387560911081670605071704642179017e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50338428974314371000017727660753886621e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27897854868353937080739431205940604582e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37798740524930029176790562876868493344e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29920082153439260734550295626576101192e-22),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.9688e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25741312407933720816582583160953651639e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04434146174674791036848306058526901384e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68959516304795838166182070164492846877e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78859935261158263390023581309925613858e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.21854067989018450973827853792407054510e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20573856697340412957421887367218135538e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30843538021351383101589538141878424462e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.05991458689384045976214216819611949900e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82253708752556965233757129893944884411e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97645331663303764054986066027964294209e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69353366461654917577775981574517182648e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59050144462227302681332505386238071973e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.85165507189649330971049854127575847359e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70711310565669331853925519429988855964e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.72047006026700174884151916064158941262e-38),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50985661940624198574968436548711898948e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81705882167596649186405364717835589894e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86537779048672498307196786015602357729e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09555188550938733096253930959407749063e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41930442687159455334801545898059105733e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.09084284266255183930305946875294557622e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.58122754063904909636061457739518406730e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91800215912676651584368499126132687326e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66413330532845384974993669138524203429e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65919563020196445006309683624384862816e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.61596083414169579692212575079167989319e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16321386033703806802403099255708972015e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.90892719803158002834365234646982537288e-25),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x * x * x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.0545e-39
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[8] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99206710301074508454959544950786401357e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.75243304700875633383991614142545185173e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.69652690455351600373808930804785330828e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.36233941060408773406522171349397343951e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.28958973553713980463808202034854958375e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.55704950313835982743029388151551925282e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.28767698270323629107775935552991333781e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.80591252844738626580182351673066365090e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.57593243741246726197476469913307836496e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99458751269722094414105565700775283458e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.91043982880665229427553316951582511317e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.99054490423334526438490907473548839751e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.36948968143124830402744607365089118030e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13781639547150826385071482161074041168e4),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x;
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53> &tag) {
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    return holtsmark_pdf_plus_imp_prec<RealType>(abs(x), tag);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>& tag) {
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    return holtsmark_pdf_plus_imp_prec<RealType>(abs(x), tag);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_pdf_imp(const holtsmark_distribution<RealType, Policy>& dist, const RealType& x) {
+    //
+    // This calculates the pdf of the Holtsmark distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::pdf(holtsmark<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale;
+
+    result = holtsmark_pdf_imp_prec(u, tag_type()) / scale;
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 0.5) {
+        // Rational Approximation
+        // Maximum Relative Error: 1.3147e-17
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(5.0e-1),
+            static_cast<RealType>(-1.34752580674786639030e-1),
+            static_cast<RealType>(1.86318418252163378528e-2),
+            static_cast<RealType>(1.04499798132512381447e-2),
+            static_cast<RealType>(-1.60831910014592923855e-3),
+            static_cast<RealType>(1.38823662364438342844e-4),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.05200341554753776087e-1),
+            static_cast<RealType>(2.12663999430421346175e-1),
+            static_cast<RealType>(7.23836000984872591553e-2),
+            static_cast<RealType>(1.67941072412796299986e-2),
+            static_cast<RealType>(4.71213644318790580839e-3),
+            static_cast<RealType>(5.86825130959777535991e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 1) {
+        RealType t = x - 0.5f;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.6265e-18
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(3.60595773518728397351e-1),
+            static_cast<RealType>(5.75238626843218819756e-1),
+            static_cast<RealType>(-3.31245319943021227117e-1),
+            static_cast<RealType>(1.48132966310216368831e-1),
+            static_cast<RealType>(-2.32875122617713403365e-2),
+            static_cast<RealType>(2.08038303148835575624e-3),
+            static_cast<RealType>(6.01511310581302829460e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.32264360456739861886e0),
+            static_cast<RealType>(6.39715443864749851087e-1),
+            static_cast<RealType>(5.03940458163958921325e-1),
+            static_cast<RealType>(8.84780893031413729292e-2),
+            static_cast<RealType>(3.01497774031208621961e-2),
+            static_cast<RealType>(3.45886005612108195390e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.4398e-20
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(2.43657975600729535515e-1),
+            static_cast<RealType>(-6.02286263626532324632e-2),
+            static_cast<RealType>(4.68361231392743283350e-2),
+            static_cast<RealType>(-1.13497179885838883972e-3),
+            static_cast<RealType>(1.20141595689136205012e-3),
+            static_cast<RealType>(3.02402304689333413256e-4),
+            static_cast<RealType>(-1.22652173865646814676e-6),
+            static_cast<RealType>(2.29521832683440044997e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.82002427359748247121e-1),
+            static_cast<RealType>(3.96529686558825119743e-1),
+            static_cast<RealType>(1.49690294526117385174e-1),
+            static_cast<RealType>(5.15049953937764895435e-2),
+            static_cast<RealType>(1.30218216530450637564e-2),
+            static_cast<RealType>(2.53640337919037463659e-3),
+            static_cast<RealType>(3.79575042317720710311e-4),
+            static_cast<RealType>(2.94034997185982139717e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.6148e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(1.05039829654829164883e-1),
+            static_cast<RealType>(1.66621813028423002562e-2),
+            static_cast<RealType>(2.93820049104275137099e-2),
+            static_cast<RealType>(3.36850260303189378587e-3),
+            static_cast<RealType>(2.27925819398326978014e-3),
+            static_cast<RealType>(1.66394162680543987783e-4),
+            static_cast<RealType>(4.51400415642703075050e-5),
+            static_cast<RealType>(2.12164734714059446913e-7),
+            static_cast<RealType>(1.69306881760242775488e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(9.63461239051296108254e-1),
+            static_cast<RealType>(6.54183344973801096611e-1),
+            static_cast<RealType>(2.92007762594247903696e-1),
+            static_cast<RealType>(1.00918751132022401499e-1),
+            static_cast<RealType>(2.55899135910670703945e-2),
+            static_cast<RealType>(4.85740416919283630358e-3),
+            static_cast<RealType>(6.11435190489589619906e-4),
+            static_cast<RealType>(4.10953248859973756440e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.5866e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(3.05754562114095142887e-2),
+            static_cast<RealType>(3.25462617990002726083e-2),
+            static_cast<RealType>(1.78205524297204753048e-2),
+            static_cast<RealType>(5.61565369088816402420e-3),
+            static_cast<RealType>(1.05695297340067353106e-3),
+            static_cast<RealType>(9.93588579804511250576e-5),
+            static_cast<RealType>(2.94302107205379334662e-6),
+            static_cast<RealType>(1.09016076876928010898e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.51164395622515150122e0),
+            static_cast<RealType>(1.09391911233213526071e0),
+            static_cast<RealType>(4.77950346062744800732e-1),
+            static_cast<RealType>(1.34082684956852773925e-1),
+            static_cast<RealType>(2.37572579895639589816e-2),
+            static_cast<RealType>(2.41806218388337284640e-3),
+            static_cast<RealType>(1.10378140456646280084e-4),
+            static_cast<RealType>(1.31559373832822136249e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.6575e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(9.47408470248235718880e-3),
+            static_cast<RealType>(4.70888722333356024081e-3),
+            static_cast<RealType>(8.66397831692913140221e-4),
+            static_cast<RealType>(7.11721056656424862090e-5),
+            static_cast<RealType>(2.56320582355149253994e-6),
+            static_cast<RealType>(3.37749186035552101702e-8),
+            static_cast<RealType>(8.32182844837952178153e-11),
+            static_cast<RealType>(-8.80541360484428526226e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(6.98261117346347123707e-1),
+            static_cast<RealType>(1.97823959738695249267e-1),
+            static_cast<RealType>(2.89311735096848395080e-2),
+            static_cast<RealType>(2.30087055379997473849e-3),
+            static_cast<RealType>(9.60592522700377510007e-5),
+            static_cast<RealType>(1.84474415187428058231e-6),
+            static_cast<RealType>(1.14339998084523151203e-8),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.4164e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(3.19610991747326729867e-3),
+            static_cast<RealType>(5.11880074251341162590e-4),
+            static_cast<RealType>(2.80704092977662888563e-5),
+            static_cast<RealType>(6.31310155466346114729e-7),
+            static_cast<RealType>(5.29618446795457166842e-9),
+            static_cast<RealType>(9.20292337847562746519e-12),
+            static_cast<RealType>(-9.16761719448360345363e-15),
+            static_cast<RealType>(1.20433396121606479712e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.56283944667056551858e-1),
+            static_cast<RealType>(2.56811818304462676948e-2),
+            static_cast<RealType>(1.26678062261253559927e-3),
+            static_cast<RealType>(3.17001344827541091252e-5),
+            static_cast<RealType>(3.68737201224811007437e-7),
+            static_cast<RealType>(1.47625352605312785910e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.2537e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.11172037056341397612e-3),
+            static_cast<RealType>(7.84545643188695076893e-5),
+            static_cast<RealType>(1.94862940242223222641e-6),
+            static_cast<RealType>(2.02704958737259525509e-8),
+            static_cast<RealType>(7.99772378955335076832e-11),
+            static_cast<RealType>(6.62544230949971310060e-14),
+            static_cast<RealType>(-3.18234118727325492149e-17),
+            static_cast<RealType>(2.03424457039308806437e-20),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.17861198759233241198e-1),
+            static_cast<RealType>(5.45962263583663240699e-3),
+            static_cast<RealType>(1.25274651876378267111e-4),
+            static_cast<RealType>(1.46857544539612002745e-6),
+            static_cast<RealType>(8.06441204620771968579e-9),
+            static_cast<RealType>(1.53682779460286464073e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType x_cube = x * x * x;
+        RealType t = static_cast<RealType>((boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.2897e-18
+        BOOST_MATH_STATIC const RealType P[4] = {
+            static_cast<RealType>(1.99471140200716338970e-1),
+            static_cast<RealType>(-6.90933799347184400422e-1),
+            static_cast<RealType>(4.30385245884336871950e-1),
+            static_cast<RealType>(3.52790131116013716885e-1),
+        };
+        BOOST_MATH_STATIC const RealType Q[3] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(-5.05959751628952574534e0),
+            static_cast<RealType>(8.04408113719341786819e0),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t;
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 0.5) {
+        // Rational Approximation
+        // Maximum Relative Error: 8.6635e-36
+       // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.0e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.48548242430636907136192799540229598637e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31541453581608245475805834922621529866e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.16579064508490250336159593502955219069e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61598809551362112011328341554044706550e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.15119245273512554325709429759983470969e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02145196753734867721148927112307708045e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90817224464950088663183617156145065001e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69596202760983052482358128481956242532e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.50461337222845025623869078372182437091e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.62777995800923647521692709390412901586e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.63937253747323898965514197114021890186e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76090180430550757765787254935343576341e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07685236907561593034104428156351640194e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27770556484351179553611274487979706736e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.99201460869149634331004096815257398515e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70139000408086498153685620963430185837e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74682544708653069148470666809094453722e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57607114117485446922700160080966856243e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01069214414741946409122492979083487977e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19996282759031441186748256811206136921e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60933466092746543579699079418115420013e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92780739162611243933581782562159603862e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 1) {
+        RealType t = x - 0.5;
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.1235e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60595773518728397925852903878144761766e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46999595154527091473427440379143006753e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36962313432466566724352608642383560211e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08387290167105915393692028475888846796e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34156151832478939276011262838869269011e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15970594471853166393830585755485842021e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47022841547527682761332752928069503835e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01955019188793323293925482112543902560e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.03069493388735516695142799880566783261e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61367662035593735709965982000611000987e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62800430658278408539398798888955969345e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.22300086876618079439960709120163780513e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19740977756009966244249035150363085180e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39394884078938560974435920719979860046e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.97107758486905601309707335353809421910e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.36594079604957733960211938310153276332e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85712904264673773213248691029253356702e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.87605080555629969548037543637523346061e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.26356599628579249350545909071984757938e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.79582114368994462181480978781382155103e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.00970375323007336435151032145023199020e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.06528824060244313614177859412028348352e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.13914667697998291289987140319652513139e-7),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.7659e-38
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43657975600729535499895880792984203140e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.37090874182351552816526775008685285108e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70793783828569126853147999925198280654e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.27295555253412802819195403503721983066e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.95916890788873842705597506423512639342e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93625795791721417553345795882983866640e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73237387099610415336810752053706403935e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08118655139419640900853055479087235138e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74920069862339840183963818219485580710e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59015304773612605296533206093582658838e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57256820413579442950151375512313072105e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.36240848333000575199740403759568680951e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53890585580518120552628221662318725825e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59245311730292556271235324976832000740e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.49800491033591771256676595185869442663e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35827615015880595229881139361463765537e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41657125931991211322147702760511651998e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11782602975553967179829921562737846592e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79410805176258968660086532862367842847e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22872839892405613311532856773434270554e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23742349724658114137235071924317934569e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.80350762663884259375711329227548815674e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59501693037547119094683008622867020131e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86068186167498269806443077840917848151e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.36940342373887783231154918541990667741e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48911186460768204167014270878839691938e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.55051094964993052272146587430780404904e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.96312716130620326771080033656930839768e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45496951385730104726429368791951742738e-10),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.9091e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05039829654829170780787685299556996311e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28948022754388615368533934448107849329e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34139151583225691775740839359914493385e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13366377215523066657592295006960955345e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08045462837998791188853367062130086996e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37648565386728404881404199616182064711e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14881702523183566448187346081007871684e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73169022445183613027772635992366708052e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.86434609673325793686202636939208406356e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20865083025640755296377488921536984172e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24550087063009488023243811976147518386e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78763689691843975658550702147832072016e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.53901449493513509116902285044951137217e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64133451376958243174967226929215155126e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78021916681275593923355425070000331160e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40116391931116431686557163556034777896e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.43891156389092896219387988411277617045e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30840297297890638941129884491157396207e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16059271948787750556465175239345182035e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.32333703228724830516425197803770832978e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74722711058640395885914966387546141874e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57544653090705553268164186689966671940e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65943099435809995745673109708218670077e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74158626875895095042054345316232575354e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.65978318533667031874695821156329945501e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07907034178758316909655424935083792468e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.16769901831316460137104511711073411646e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72764558714782436683712413015421717627e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42494185105694341746192094740530489313e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.47668761140694808076322373887857100882e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06395948884595166425357861427667353718e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.04398743651684916010743222115099630062e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47852251142917253705233519146081069006e-10),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.2255e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[20] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05754562114095147060025732340404111260e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29082907781747007723015304584383528212e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.15736486393536930535038719804968063752e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47619683293773846642359668429058772885e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78777185267549567154655052281449528836e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.32280474402180284471490985942690221861e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45430564625797085273267452885960070105e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.81643129239005795245093568930666448817e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57851748656417804512189330871167578685e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.04264676511381380381909064283066657450e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84536783037391183433322642273799250079e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27169201994160924743393109705813711010e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.42623512076200527099335832138825884729e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98298083389459839517970895839114237996e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71357920034737751299594537655948527288e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.98563999354325930973228648080876368296e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36248172644168880316722905969876969074e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61071663749398045880261823483568866904e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.95933262363502031836408613043245164787e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23007623135952181561484264810647517912e-21),
+        };
+        BOOST_MATH_STATIC const RealType Q[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17760389606658547971193065026711073898e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49565543987559264712057768584303008339e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94822569926563661124528478579051628722e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14676844425183314970062115422221981422e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35960757354198367535169328826167556715e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04865288305482048252211468989095938024e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.51599632816346741950206107526304703067e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74065824586512487126287762563576185455e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91819078437689679732215988465616022328e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.41675362609023565846569121735444698127e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17176431752708802291177040031150143262e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52367587943529121285938327286926798550e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59405168077254169099025950029539316125e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29448420654438993509041228047289503943e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.70091773726833073512661846603385666642e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.03909417984236210307694235586859612592e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.59098698207309055890188845050700901852e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28146456709550379493162440280752828165e-14),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0174e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47408470248235665279366712356669210597e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32149712567170349164953101675315481096e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39806230477579028722350422669222849223e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19665271447867857827798702851111114658e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.06773237553503696884546088197977608676e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41294370314265386485116359052296796357e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74848600628353761723457890991084017928e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52963427970210468265870547940464851481e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.33389244528769791436454176079341120973e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.86702000100897346192018772319301428852e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04192907586200235211623448416582655030e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70804269459077260463819507381406529187e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52665761996923502719902050367236108720e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.01866635015788942430563628065687465455e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46658865059509532456423012727042498365e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.05806999626031246519161395419216393127e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.37645700309533972676063947195650607935e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59608758824065179587008165265773042260e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17347162462484266250945490058846704988e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.24511137251392519285309985668265122633e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58497164094526279145784765183039854604e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40787701096334660711443654292041286786e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34615029717812271556414485397095293077e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.17712219229282308306346195001801048971e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.24578142893420308057222282020407949529e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23429691331344898578916434987129070432e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41486460551571344910835151948209788541e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23569151219279213399210115101532416912e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.21438860148387356361258237451828377118e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46770060692933726695086996017149976796e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58079984178724940266882149462170567147e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19997796316046571607659704855966005180e-17),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.5109e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19610991747326725339429696634365932643e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74646611039453235739153286141429338461e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13331430865337412098234177873337036811e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58947311195482646360642638791970923726e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79226752074485124923797575635082779509e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73081326043094090549807549513512116319e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05408849431691450650464797109033182773e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75716486666270246158606737499459843698e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81075133718930099703621109350447306080e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41318403854345256855350755520072932140e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70220987388883118699419526374266655536e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38711669183547686107032286389030018396e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31300491679098874872172866011372530771e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.99223939265527640018203019269955457925e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18316957049006338447926554380706108087e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47298013808154174645356607027685011183e-32),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.42561659771176310412113991024326129105e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83353398513931409985504410958429204317e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07254121026393428163401481487563215753e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36667170168890854756291846167398225330e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54019749685699795075624204463938596069e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35321766966107368759516431698755077175e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13350720091296144188972188966204719103e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38107118390482863395863404555696613407e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59267757423034664579822257229473088511e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29549090773392058626428205171445962834e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69922128600755513676564327500993739088e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31337037977667816904491472174578334375e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28088047429043940293455906253037445768e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01213369826105495256520034997664473667e-22),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2707e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11172037056341396583040940446061501972e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09383362521204903801686281772843962372e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71440982391172647693486692131238237524e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.01685075759372692173396811575536866699e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36574894913423830789864836789988898151e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.59644999935503505576091023207315968623e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95573282292603122067959656607163690356e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10361486103428098366627536344769789255e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80946231978997457068033851007899208222e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.39341134002270945594553624959145830111e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72307967968246649714945553177468010263e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41093409238620968003297675770440189200e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70464969040825495565297719377221881609e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.25341184125872354328990441812668510029e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54663422572657744572284839697818435372e-36),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35632539169215377884393376342532721825e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.46975491055790597767445011183622230556e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51806800870130779095309105834725930741e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07403939022350326847926101278370197017e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66046114012817696416892197044749060854e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.16723371111678357128668916130767948114e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22972796529973974439855811125888770710e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91073180314665062004869985842402705599e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43753004383633382914827301174981384446e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.77313206526206002175298314351042907499e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32850553089285690900825039331456226080e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.85369976595753971532524294793778805089e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28948021485210224442871255909409155592e-25),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType x_cube = x * x * x;
+        RealType t = (boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3);
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.4677e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[7] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99471140200716338969973029967190934238e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.48481268366645066801385595379873318648e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64087860141734943856373451877569284231e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.45555576045996041260191574503331698473e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43290677381328916734673040799990923091e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.63011127597770211743774689830589568544e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.61127812511057623691896118746981066174e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90660291309478542795359451748753358123e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60631500002415936739518466837931659008e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.88655117367497147850617559832966816275e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48350179543067311398059386524702440002e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.18873206560757944356169500452181141647e3),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t;
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 53>& tag) {
+    if (x >= 0) {
+        return complement ? holtsmark_cdf_plus_imp_prec(x, tag) : 1 - holtsmark_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - holtsmark_cdf_plus_imp_prec(-x, tag) : holtsmark_cdf_plus_imp_prec(-x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 113>& tag) {
+    if (x >= 0) {
+        return complement ? holtsmark_cdf_plus_imp_prec(x, tag) : 1 - holtsmark_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - holtsmark_cdf_plus_imp_prec(-x, tag) : holtsmark_cdf_plus_imp_prec(-x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_cdf_imp(const holtsmark_distribution<RealType, Policy>& dist, const RealType& x, bool complement) {
+    //
+    // This calculates the cdf of the Holtsmark distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::cdf(holtsmark<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale;
+
+    result = holtsmark_cdf_imp_prec(u, complement, tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (ilogb(p) >= -2) {
+        RealType t = -log2(ldexp(p, 1));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.8068e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(7.59789769759814986929e-1),
+            static_cast<RealType>(1.27515008642985381862e0),
+            static_cast<RealType>(4.38619247097275579086e-1),
+            static_cast<RealType>(-1.25521537863031799276e-1),
+            static_cast<RealType>(-2.58555599127223857177e-2),
+            static_cast<RealType>(1.20249932437303932411e-2),
+            static_cast<RealType>(-1.36753104188136881229e-3),
+            static_cast<RealType>(6.57491277860092595148e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.48696501912062288766e0),
+            static_cast<RealType>(2.06239370128871696850e0),
+            static_cast<RealType>(5.67577904795053902651e-1),
+            static_cast<RealType>(-2.89022828087034733385e-2),
+            static_cast<RealType>(-2.17207943286085236479e-2),
+            static_cast<RealType>(3.14098307020814954876e-4),
+            static_cast<RealType>(3.51448381406676891012e-4),
+            static_cast<RealType>(5.71995514606568751522e-5),
+        };
+
+        result = t * tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -3) {
+        RealType t = -log2(ldexp(p, 2));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0339e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(3.84521387984759064238e-1),
+            static_cast<RealType>(4.15763727809667641126e-1),
+            static_cast<RealType>(-1.73610240124046440578e-2),
+            static_cast<RealType>(-3.89915764128788049837e-2),
+            static_cast<RealType>(1.07252911248451890192e-2),
+            static_cast<RealType>(7.62613727089795367882e-4),
+            static_cast<RealType>(-3.11382403581073580481e-4),
+            static_cast<RealType>(3.93093062843177374871e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(6.76193897442484823754e-1),
+            static_cast<RealType>(3.70953499602257825764e-2),
+            static_cast<RealType>(-2.84211795745477605398e-2),
+            static_cast<RealType>(2.66146101014551209760e-3),
+            static_cast<RealType>(1.85436727973937413751e-3),
+            static_cast<RealType>(2.00318687649825430725e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.4431e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(4.46943301497773314460e-1),
+            static_cast<RealType>(-1.07267614417424412546e-2),
+            static_cast<RealType>(-7.21097021064631831756e-2),
+            static_cast<RealType>(2.93948745441334193469e-2),
+            static_cast<RealType>(-7.33259305010485915480e-4),
+            static_cast<RealType>(-1.38660725579083612045e-3),
+            static_cast<RealType>(2.95410432808739478857e-4),
+            static_cast<RealType>(-2.88688017391292485867e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(-2.72809429017073648893e-2),
+            static_cast<RealType>(-7.85526213469762960803e-2),
+            static_cast<RealType>(2.41360900478283465241e-2),
+            static_cast<RealType>(3.44597797125179611095e-3),
+            static_cast<RealType>(-8.65046428689780375806e-4),
+            static_cast<RealType>(-1.04147382037315517658e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -6) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.8871e-17
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(4.25344469980677332786e-1),
+            static_cast<RealType>(3.42055470008289997369e-2),
+            static_cast<RealType>(9.33607217644370441642e-2),
+            static_cast<RealType>(4.57057092587794346086e-2),
+            static_cast<RealType>(1.16149976708336017542e-2),
+            static_cast<RealType>(6.40479797962035786337e-3),
+            static_cast<RealType>(1.58526153828271386329e-3),
+            static_cast<RealType>(3.84032908993313260466e-4),
+            static_cast<RealType>(6.98960839033991110525e-5),
+            static_cast<RealType>(9.66690587477825432174e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[10] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.60044610004497775009e-1),
+            static_cast<RealType>(2.41675490962065446592e-1),
+            static_cast<RealType>(1.13752642382290596388e-1),
+            static_cast<RealType>(4.05058759031434785584e-2),
+            static_cast<RealType>(1.59432816225295660111e-2),
+            static_cast<RealType>(4.79286678946992027479e-3),
+            static_cast<RealType>(1.16048151070154814260e-3),
+            static_cast<RealType>(2.01755520912887201472e-4),
+            static_cast<RealType>(2.82884561026909054732e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 6));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.8173e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(3.68520435599726877886e-1),
+            static_cast<RealType>(8.26682725061327242371e-1),
+            static_cast<RealType>(6.85235826889543887309e-1),
+            static_cast<RealType>(3.28640408399661746210e-1),
+            static_cast<RealType>(9.04801242897407528807e-2),
+            static_cast<RealType>(1.57470088502958130451e-2),
+            static_cast<RealType>(1.61541023176880542598e-3),
+            static_cast<RealType>(9.78919203915954346945e-5),
+            static_cast<RealType>(9.71371309261213597491e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.29132755303753682133e0),
+            static_cast<RealType>(1.95530118226232968288e0),
+            static_cast<RealType>(9.55029685883545321419e-1),
+            static_cast<RealType>(2.68254036588585643328e-1),
+            static_cast<RealType>(4.61398419640231283164e-2),
+            static_cast<RealType>(4.66131710581568432246e-3),
+            static_cast<RealType>(2.94491397241310968725e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.0376e-17
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(3.48432718168951419458e-1),
+            static_cast<RealType>(2.99680703419193973028e-1),
+            static_cast<RealType>(1.09531896991852433149e-1),
+            static_cast<RealType>(2.28766133215975559897e-2),
+            static_cast<RealType>(3.09836969941710802698e-3),
+            static_cast<RealType>(2.89346186674853481383e-4),
+            static_cast<RealType>(1.96344583080243707169e-5),
+            static_cast<RealType>(9.48415601271652569275e-7),
+            static_cast<RealType>(3.08821091232356755783e-8),
+            static_cast<RealType>(5.58003465656339818416e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[10] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.73938978582311007855e-1),
+            static_cast<RealType>(3.21771888210250878162e-1),
+            static_cast<RealType>(6.70432401844821772827e-2),
+            static_cast<RealType>(9.05369648218831664411e-3),
+            static_cast<RealType>(8.50098390828726795296e-4),
+            static_cast<RealType>(5.73568804840571459050e-5),
+            static_cast<RealType>(2.78374120155590875053e-6),
+            static_cast<RealType>(9.03427646135263412003e-8),
+            static_cast<RealType>(1.63556457120944847882e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.2804e-17
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(3.41419813138786920868e-1),
+            static_cast<RealType>(1.30219412019722274099e-1),
+            static_cast<RealType>(2.36047671342109636195e-2),
+            static_cast<RealType>(2.67913051721210953893e-3),
+            static_cast<RealType>(2.10896260337301129968e-4),
+            static_cast<RealType>(1.19804595761611765179e-5),
+            static_cast<RealType>(4.91470756460287578143e-7),
+            static_cast<RealType>(1.38299844947707591018e-8),
+            static_cast<RealType>(2.25766283556816829070e-10),
+            static_cast<RealType>(-8.46510608386806647654e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.81461950831351846380e-1),
+            static_cast<RealType>(6.91390438866520696447e-2),
+            static_cast<RealType>(7.84798596829449138229e-3),
+            static_cast<RealType>(6.17735117400536913546e-4),
+            static_cast<RealType>(3.50937328177439258136e-5),
+            static_cast<RealType>(1.43958654321452532854e-6),
+            static_cast<RealType>(4.05109749922716264456e-8),
+            static_cast<RealType>(6.61306247924109415113e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.8545e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(3.41392032051575965049e-1),
+            static_cast<RealType>(1.53372256183388434238e-1),
+            static_cast<RealType>(3.33822240038718319714e-2),
+            static_cast<RealType>(4.66328786929735228532e-3),
+            static_cast<RealType>(4.67981207864367711082e-4),
+            static_cast<RealType>(3.48119463063280710691e-5),
+            static_cast<RealType>(2.17755850282052679342e-6),
+            static_cast<RealType>(7.40424342670289242177e-8),
+            static_cast<RealType>(4.61294046336533026640e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.49255524669251621744e-1),
+            static_cast<RealType>(9.77826688966262423974e-2),
+            static_cast<RealType>(1.36596271675764346980e-2),
+            static_cast<RealType>(1.37080296105355418281e-3),
+            static_cast<RealType>(1.01970588303201339768e-4),
+            static_cast<RealType>(6.37846903580539445994e-6),
+            static_cast<RealType>(2.16883897125962281968e-7),
+            static_cast<RealType>(1.35121503608967367232e-8),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else {
+        const BOOST_MATH_STATIC_LOCAL_VARIABLE RealType c = ldexp(cbrt(constants::pi<RealType>()), 1);
+
+        RealType p_square = p * p;
+
+        if ((boost::math::isnormal)(p_square)) {
+            result = 1 / (cbrt(p_square) * c);
+        }
+        else if (p > 0) {
+            result = 1 / (cbrt(p) * cbrt(p) * c);
+        }
+        else {
+            result = boost::math::numeric_limits<RealType>::infinity();
+        }
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (ilogb(p) >= -2) {
+        RealType u = -log2(ldexp(p, 1));
+
+        if (u < 0.5) {
+            // Rational Approximation
+            // Maximum Relative Error: 1.7987e-35
+           // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[14] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.59789769759815031687162026655576575384e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23247138049619855169890925442523844619e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35351935489348780511227763760731136136e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17321534695821967609074567968260505604e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30930523792327030433989902919481147250e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.47676800034255152477549544991291837378e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.09952071024064609787697026812259269093e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65479872964217159571026674930672527880e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30204907832301876030269224513949605725e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.61038349134944320766567917361933431224e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.17242905696479357297850061918336600969e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43640101589433162893041733511239841220e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.39406616773257816628641556843884616119e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54871597065387376666252643921309051097e-7),
+            };
+            BOOST_MATH_STATIC const RealType Q[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06310038178166385607814371094968073940e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06144046990424238286303107360481469219e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17860081295611631017119482265353540470e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26319639748358310901277622665331115333e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25962127567362715217159291513550804588e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65543974081934423010588955830131357921e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.80331848633772107482330422252085368575e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.97426948050874772305317056836660558275e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10722999873793200671617106731723252507e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68871255379198546500699434161302033826e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70190278641952708999014435335172772138e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11562497711461468804693130702653542297e-7),
+            };
+            // LCOV_EXCL_STOP
+            result = u * tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * cbrt(p * p));
+        }
+        else {
+            RealType t = u - static_cast <RealType>(0.5);
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.5554e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63490994331899195346399558699533994243e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68682839419340144322747963938810505658e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63089084712442063245295709191126453412e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24910510426787025593146475670961782647e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.14005632199839351091767181535761567981e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.88144015238275997284082820907124267240e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12015895125039876623372795832970536355e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.96386756665254981286292821446749025989e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.82855208595003635135641502084317667629e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.18007513930934295792217002090233670917e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.82563310387467580262182864644541746616e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52830681121195099547078704713089681353e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91383571211375811878311159248551586411e-8),
+            };
+            BOOST_MATH_STATIC const RealType Q[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96820655322136936855997114940653763917e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30209571878469737819039455443404070107e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61235660141139249931521613001554108034e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.31683133997030095798635713869616211197e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.20681979279848555447978496580849290723e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08958899028812330281115719259773001136e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02478613175545210977059079339657545008e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.68653479132148912896487809682760117627e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35166554499214836086438565154832646441e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95409975934011596023165394669416595582e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.84312112139729518216217161835365265801e-7),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+        }
+    }
+    else if (ilogb(p) >= -3) {
+        RealType u = -log2(ldexp(p, 2));
+
+        if (u < 0.5) {
+            // Rational Approximation
+            // Maximum Relative Error: 1.0297e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.84521387984759060262188972210005114936e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70837834325236202821328032137877091515e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53856963029219911450181095566096563059e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.97659091653089105048621336944687224192e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77726241585387617566937892474685179582e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21657224955483589784473724186837316423e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76357400631206366078287330192525531850e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.45967265853745968166172649261385754061e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08367654892620484522749804048317330020e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41224530727710207304898458924763411052e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.02908228738160003274584644834000176496e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05702214080592377840761032481067834813e-7),
+            };
+            BOOST_MATH_STATIC const RealType Q[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33954869248363301881659953529609341564e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73738626674455393272550888585363920917e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.90708494363306682523722238824373341707e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.49559648492983033200126224112060119905e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.07561158260652000950392950266037061167e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30349651195547682860585068738648645100e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.21766408404123861757376277367204136764e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.22181499366766592894880124261171657846e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74488053046587079829684775540618210211e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90504597668186854963746384968119788469e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45195198322028676384075318222338781298e-7),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * cbrt(p * p));
+        }
+        else {
+            RealType t = u - static_cast <RealType>(0.5);
+
+            // Rational Approximation
+            // Maximum Relative Error: 1.3688e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34418795581931891732555950599385666106e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.13006013029934051875748102515422669897e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.27990072710518465265454549585803147529e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.82530244963278920355650323928131927272e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05335741422175616606162502617378682462e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71242678756797136217651369710748524650e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.65147398836785709305701073315614307906e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23912765853731378067295654886575185240e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77861910171412622761254991979036167882e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11971510714149983297022108523700437739e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23649928279010039670034778778065846828e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.99636080473697209793683863161785312159e-8),
+            };
+            BOOST_MATH_STATIC const RealType Q[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95056572065373808001002483348789719155e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.55702988004729812458415992666809422570e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.07586989542594910084052301521098115194e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96831670560124470215505714403486118412e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.86445076378084412691927796983792892534e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.75566285003039738258189045863064261980e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.18557444175572723760508226182075127685e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66667716357950609103712975111660496416e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70999480357934082364999779023268059131e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.14604868719110256415222454908306045416e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.32724040071094913191419223901752642417e-8),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+        }
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.6020e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46943301497773318715008398224877079279e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.85403413700924949902626248891615772650e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02791895890363892816315784780533893399e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.89147412486638444082129846251261616763e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.93382251168424191872267997181870008850e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.68332196426082871660060467570049113632e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.88720436260994811649162949644253306037e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34099304204778307050211441936900839075e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.42970601149275611131932131801993030928e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19329425598839605828710629592687495198e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.48826007216547106568423189194739111033e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47132934846160946190230821709692067279e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34123780321108493820637601375183345528e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.64549285026064221742294542922996905241e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72723306533295983872420985773212608299e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.23756826160440280076231428938184359865e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46557011055563840763437682311082689407e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18907861669025579159409035585375166964e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.09998981512549500250715800529896557509e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.09496663758959409482213456915225652712e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37086325651334206453116588474211557676e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65325780110454655811120026458133145750e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.93435549562125602056160657604473721758e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34967558308250784125219085040752451132e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73883529653464036447550624641291181317e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.88600727347267778330635397957540267359e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.26681383000234695948685993798733295748e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19871610873353691152255428262732390602e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42468017918888155246438948321084323623e-9),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -5) {
+        RealType u = -log2(ldexp(p, 4));
+
+        if (u < 0.5) {
+            // Rational Approximation
+            // Maximum Relative Error: 5.0596e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25344469980677353573160570139298422046e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41915371584999983192100443156935649063e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02829239548689190780023994008688591230e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29283473326959885625548350158197923999e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.01078477165670046284950196047161898687e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02714892887893367912743194877742997622e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.43133417775367444366548711083157149060e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34782994090554432391320506638030058071e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06742736859237185836735105245477248882e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.55982601406660341132288721616681417444e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.57770758189194396236862269776507019313e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29311341249565125992213260043135188072e-8),
+            };
+            BOOST_MATH_STATIC const RealType Q[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54021943144355190773797361537886598583e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30965787836836308380896385568728211303e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19314242976592846926644622802257778872e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.84123785238634690769817401191138848504e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75779029464908805680899310810660326192e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15078294915445673781718097749944059134e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84667183003626452412083824490324913477e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.59521438712225874821007396323337016693e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.90446539427779905568600432145715126083e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.21425779911599424040614866482614099753e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.00972806247654369646317764344373036462e-8),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * cbrt(p * p));
+        }
+        else {
+            RealType t = u - static_cast <RealType>(0.5);
+
+            // Rational Approximation
+            // Maximum Relative Error: 8.3743e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08071367192424306005939751362206079160e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94625900993512461462097316785202943274e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55970241156822104458842450713854737857e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.07663066299810473476390199553510422731e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89859986209620592557993828310690990189e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04002735956724252558290154433164340078e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.28754717941144647796091692241880059406e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19307116062867039608045413276099792797e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.02377178609994923303160815309590928289e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.71739619655097982325716241977619135216e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70229045058419872036870274360537396648e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90495731447121207951661931979310025968e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23210708203609461650368387780135568863e-8),
+            };
+            BOOST_MATH_STATIC const RealType Q[11] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.93402256203255215539822867473993726421e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42452702043886045884356307934634512995e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.16981055684612802160174937997247813645e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39560623514414816165791968511612762553e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26014275897567952035148355055139912545e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42163967753843746501638925686714935099e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.63605648300801696460942201096159808446e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.55933967787268788177266789383155699064e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41526208021076709058374666903111908743e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.08505866202670144225100385141263360218e-6),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+        }
+    }
+    else if (ilogb(p) >= -6) {
+        RealType t = -log2(ldexp(p, 5));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.4734e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92042979500197776619414802317216082414e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94742044285563829335663810275331541585e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14525306632578654372860377652983462776e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88893010132758460781753381176593178775e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08491462791290535107958214106528611951e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61374431854187722720094162894017991926e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11641062509116613779440753514902522337e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12474548036763970495563846370119556004e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48140831258790372410036499310440980121e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26913338169355215445128368312197650848e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63109797282729701768942543985418804075e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55296802973076575732233624155433324402e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72108609713971908723724065216410393928e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.93328436272999507339897246655916666269e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72119240610740992234979508242967886200e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17836139198065889244530078295061548097e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78065342260594920160228973261455037923e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08575070304822733863613657779515344137e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81185785915044621118680763035984134530e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87597191269586886460326897968559867853e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07903258768761230286548634868645339678e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88395769450457864233486684232536503140e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05678227243099671420442217017131559055e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.24803207742284923122212652186826674987e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06094715338829793088081672723947647238e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.96454433858093590192363331553516923090e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94509901530299070041475386866323617753e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49001710126540196485963921184736711193e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58899179756014192338509671769986887613e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06916561094749601736592488829778059190e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 6));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1570e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.68520435599726860132888599110871216319e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.01076105507184082206031922185510102322e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39912455237662038937400667644545834191e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51088991221663244634723139723207272560e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26465949648856746869050310379379898086e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37079746226805258449355819952819997723e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49372033421420312720741838903118544951e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95729572745049276972587492142384353131e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92794840197452838799536047152725573779e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96897979363475104635129765703613472468e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44138843334474914059035559588791041371e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.78076328055619970057667292651627051391e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04182093251998194244585085400876144351e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04392999917657413659748817212746660436e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76006125565969084470924344826977844710e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21045181507045010640119572995692565368e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61400097324698003962179537436043636306e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.88084230973635340409728710734906398080e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49319798750825059930589954921919984293e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90218243410186000622818205955425584848e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25384789213915993855434876209137054104e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58563858782064482133038568901836564329e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39112608961600614189971858070197609546e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29192895265168981204927382938872469754e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66418375973954918346810939649929797237e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01606040038159207768769492693779323748e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.75837675697421536953171865636865644576e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30258315910281295093103384193132807400e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28333635097670841003561009290200071343e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.04871369296490431325621140782944603554e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05077352164673794093561693258318905067e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28508157403208548483052311164947568580e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22527248376737724147359908626095469985e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75479484339716254784610505187249810386e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39990051830081888581639577552526319577e-11),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1362e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[22] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48432718168951398420402661878962745094e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55946442453078865766668586202885528338e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.54912640113904816247923987542554486059e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60852745978561293262851287627328856197e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93256608166097432329211369307994852513e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.94707001299612588571704157159595918562e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40368387009950846525432054396214443833e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62326983889228773089492130483459202197e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97166112600628615762158757484340724056e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95053681446806610424931810174198926457e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13613767164027076487881255767029235747e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46627172639536503825606138995804926378e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26813757095977946534946955553296696736e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01393212063713249666862633388902006492e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04428602119155661411061942866480445477e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52977051350929618206095556763031195967e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63092013964238065197415324341392517794e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77457116423818347179318334884304764609e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10372468210274291890669895933038762772e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85517152798650696598776156882211719502e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01916800572423194619358228507804954863e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72241483171311778625855302356391965266e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[21] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18341916009800042837726003154518652168e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19215655980509256344434487727207541208e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34380326549827252189214516628038733750e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65069135930665131327262366757787760402e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74132905027750048531814627726862962404e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.11184893124573373947875834716323223477e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.68456853089572312034718359282699132364e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16340806625223749486884390838046244494e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45007766006724826837429360471785418874e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50443251593190111677537955057976277305e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30829464745241179175728900376502542995e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.57256894336319418553622695416919409120e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89973763917908403951538315949652981312e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05834675579622824896206540981508286215e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32721343511724613011656816221169980981e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77569292346900432492044041866264215291e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39903737668944675386972393000746368518e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23200083621376582032771041306045737695e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43557805626692790539354751731913075096e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91326410956582998375100191562832969140e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.1729e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41419813138786928653984591611599949126e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94225020281693988785012368481961427155e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28967134188573605597955859185818311256e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16617725083935565014535265818666424029e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13379610773944032381149443514208866162e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06508483032198116332154635763926628153e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.89315471210589177037346413966039863126e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72993906450633221200844495419180873066e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32883391567312244751716481903540505335e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.50998889887280885500990101116973130081e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23247766687180294767338042555173653249e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.12326475887709255500757383109178584638e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11688319088825228685832870139320733695e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95381542569360703428852622701723193645e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.70730387484749668293167350494151199659e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84243405919322052861165273432136993833e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40860917180131228318146854666419586211e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.85122374200561402546731933480737679849e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.79744248200459077556218062241428072826e-32),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68930884381361438749954611436694811868e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54944129151720429074748655153760118465e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68493670923968273171437877298940102712e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32109946297461941811102221103314572340e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11983030120265263999033828442555862122e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31204888358097171713697195034681853057e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38548604936907265274059071726622071821e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.82158855359673890472124017801768455208e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78564556026252472894386810079914912632e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46854501887011863360558947087254908412e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67236380279070121978196383998000020645e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20076045812548485396837897240357026254e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15814123143437217877762088763846289858e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67177999717442465582949551415385496304e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71135001552136641449927514544850663366e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.98449056954034104266783180068258117013e-22),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.8330e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41392032051575981622151194498090952488e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32651097995974052731414709779952524875e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51927763729719814565225981452897995722e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11148082477882981299945196621348531180e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80457559655975695558885644380771202301e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96223001525552834934139567532649816367e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10625449265784963560596299595289620029e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14785887121654524328854820350425279893e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00832358736396150660417651391240544392e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63128732906298604011217701767305935851e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86148432181465165445355560568442172406e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44088921565424320298916604159745842835e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.95220124898384051195673049864765987092e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50531060529388128674128631193212903032e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06119051130826148039530805693452156757e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19849873960405145967462029876325494393e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66833600176986734600260382043861669021e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07829060832934383885234817363480653925e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21485411177823993142696645934560017341e-40),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.88559444380290379529260819350179144435e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.37942717465159991856146428659881557553e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.11409915376157429952160202733757574026e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.21511733003564236929107862750700281202e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74773232555012468159223116269289241483e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24042271031862389840796415749527818562e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50790246845873571117791557191071320982e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88274886666078071130557536971927872847e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94242592538917360235050248151146832636e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45262967284548223426004177385213311949e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30081806380053435857465845326686775489e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62226426496757450797456131921060042081e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40933140159573381494354127717542598424e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03760580312376891985077265621432029857e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43980683769941233230954109646012150124e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.88686274782816858372719510890126716148e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07336140055510452905474533727353308321e-25),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -128) {
+        RealType t = -log2(ldexp(p, 64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.9085e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41392031627647840832213878541731833340e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48256908849985263191468999842405689327e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.16515822909144946601084169745484248278e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42246334265547596187501472291026180697e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54145961608971551335283437288203286104e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64840354062369555376354747633807898689e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38246669464526050793398379055335943951e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29684566081664150074215568847731661446e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.98456331768093420851844051941851740455e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36738267296531031235518935656891979319e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08128287278026286279504717089979753319e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38334581618709868951669630969696873534e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.08478537820365448038773095902465198679e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30768169494950935152733510713679558562e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49254243621461466892836128222648688091e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17026357413798368802986708112771803774e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05630817682870951728748696694117980745e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.13881361534205323565985756195674181203e-50),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34271731953273239599863811873205236246e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27133013035186849060586077266046297964e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29542078693828543540010668640353491847e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33027698228265344545932885863767276804e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06868444562964057780556916100143215394e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.97868278672593071061800234869603536243e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79869926850283188735312536038469293739e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75298857713475428365153491580710497759e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93449891515741631851202042430818496480e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36715626731277089013724968542144140938e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.98125789528264426869121548546848968670e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78234546049400950521459021508632294206e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83044000387150792643468853129175805308e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30111486296552039388613073915170671881e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28628462422858134962149154420358876352e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48108558735886480279744474396456699335e-21),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else {
+        const BOOST_MATH_STATIC_LOCAL_VARIABLE RealType c = ldexp(cbrt(constants::pi<RealType>()), 1);
+
+        RealType p_square = p * p;
+
+        if ((boost::math::isnormal)(p_square)) {
+            result = 1 / (cbrt(p_square) * c);
+        }
+        else if (p > 0) {
+            result = 1 / (cbrt(p) * cbrt(p) * c);
+        }
+        else {
+            result = boost::math::numeric_limits<RealType>::infinity();
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 53>& tag)
+{
+    if (p > 0.5) {
+        return !complement ? holtsmark_quantile_upper_imp_prec(1 - p, tag) : -holtsmark_quantile_upper_imp_prec(1 - p, tag);
+    }
+
+    return complement ? holtsmark_quantile_upper_imp_prec(p, tag) : -holtsmark_quantile_upper_imp_prec(p, tag);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 113>& tag)
+{
+    if (p > 0.5) {
+        return !complement ? holtsmark_quantile_upper_imp_prec(1 - p, tag) : -holtsmark_quantile_upper_imp_prec(1 - p, tag);
+    }
+
+    return complement ? holtsmark_quantile_upper_imp_prec(p, tag) : -holtsmark_quantile_upper_imp_prec(p, tag);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_quantile_imp(const holtsmark_distribution<RealType, Policy>& dist, const RealType& p, bool complement)
+{
+    // This routine implements the quantile for the Holtsmark distribution,
+    // the value p may be the probability, or its complement if complement=true.
+
+    constexpr auto function = "boost::math::quantile(holtsmark<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_probability(function, p, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * holtsmark_quantile_imp_prec(p, complement, tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_entropy_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(2.06944850513462440032);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_entropy_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.0694485051346244003155800384542166381);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType holtsmark_entropy_imp(const holtsmark_distribution<RealType, Policy>& dist)
+{
+    // This implements the entropy for the Holtsmark distribution,
+
+    constexpr auto function = "boost::math::entropy(holtsmark<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Holtsmark distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = holtsmark_entropy_imp_prec<RealType>(tag_type()) + log(scale);
+
+    return result;
+}
+
+} // detail
+
+template <class RealType = double, class Policy = policies::policy<> >
+class holtsmark_distribution
+{
+    public:
+    typedef RealType value_type;
+    typedef Policy policy_type;
+
+    BOOST_MATH_GPU_ENABLED holtsmark_distribution(RealType l_location = 0, RealType l_scale = 1)
+        : mu(l_location), c(l_scale)
+    {
+        constexpr auto function = "boost::math::holtsmark_distribution<%1%>::holtsmark_distribution";
+        RealType result = 0;
+        detail::check_location(function, l_location, &result, Policy());
+        detail::check_scale(function, l_scale, &result, Policy());
+    } // holtsmark_distribution
+
+    BOOST_MATH_GPU_ENABLED RealType location()const
+    {
+        return mu;
+    }
+    BOOST_MATH_GPU_ENABLED RealType scale()const
+    {
+        return c;
+    }
+
+    private:
+    RealType mu;    // The location parameter.
+    RealType c;     // The scale parameter.
+};
+
+typedef holtsmark_distribution<double> holtsmark;
+
+#ifdef __cpp_deduction_guides
+template <class RealType>
+holtsmark_distribution(RealType) -> holtsmark_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+template <class RealType>
+holtsmark_distribution(RealType, RealType) -> holtsmark_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+#endif
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const holtsmark_distribution<RealType, Policy>&)
+{ // Range of permissible values for random variable x.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const holtsmark_distribution<RealType, Policy>&)
+{ // Range of supported values for random variable x.
+   // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-tools::max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const holtsmark_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::holtsmark_pdf_imp(dist, x);
+} // pdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const holtsmark_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::holtsmark_cdf_imp(dist, x, false);
+} // cdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const holtsmark_distribution<RealType, Policy>& dist, const RealType& p)
+{
+    return detail::holtsmark_quantile_imp(dist, p, false);
+} // quantile
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<holtsmark_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::holtsmark_cdf_imp(c.dist, c.param, true);
+} //  cdf complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<holtsmark_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::holtsmark_quantile_imp(c.dist, c.param, true);
+} // quantile complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mean(const holtsmark_distribution<RealType, Policy> &dist)
+{
+    return dist.location();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType variance(const holtsmark_distribution<RealType, Policy>& /*dist*/)
+{
+    return boost::math::numeric_limits<RealType>::infinity();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mode(const holtsmark_distribution<RealType, Policy>& dist)
+{
+    return dist.location();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType median(const holtsmark_distribution<RealType, Policy>& dist)
+{
+    return dist.location();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const holtsmark_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no skewness:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Holtsmark Distribution has no skewness");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::skewness(holtsmark<%1%>&)",
+        "The Holtsmark distribution does not have a skewness: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const holtsmark_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Holtsmark Distribution has no kurtosis");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis(holtsmark<%1%>&)",
+        "The Holtsmark distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const holtsmark_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis excess:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Holtsmark Distribution has no kurtosis excess");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis_excess(holtsmark<%1%>&)",
+        "The Holtsmark distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const holtsmark_distribution<RealType, Policy>& dist)
+{
+    return detail::holtsmark_entropy_imp(dist);
+}
+
+}} // namespaces
+
+
+#endif // BOOST_STATS_HOLTSMARK_HPP
diff --git a/include/boost/math/distributions/inverse_chi_squared.hpp b/include/boost/math/distributions/inverse_chi_squared.hpp
index 19dd0371e8..1a3c680d23 100644
--- a/include/boost/math/distributions/inverse_chi_squared.hpp
+++ b/include/boost/math/distributions/inverse_chi_squared.hpp
@@ -1,6 +1,6 @@
 // Copyright John Maddock 2010.
 // Copyright Paul A. Bristow 2010.
-
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -9,6 +9,8 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_INVERSE_CHI_SQUARED_HPP
 #define BOOST_MATH_DISTRIBUTIONS_INVERSE_CHI_SQUARED_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp> // for incomplete beta.
 #include <boost/math/distributions/complement.hpp> // for complements.
@@ -24,14 +26,12 @@
 // Weisstein, Eric W. "Inverse Chi-Squared Distribution." From MathWorld--A Wolfram Web Resource.
 // http://mathworld.wolfram.com/InverseChi-SquaredDistribution.html
 
-#include <utility>
-
 namespace boost{ namespace math{
 
 namespace detail
 {
   template <class RealType, class Policy>
-  inline bool check_inverse_chi_squared( // Check both distribution parameters.
+  BOOST_MATH_GPU_ENABLED inline bool check_inverse_chi_squared( // Check both distribution parameters.
         const char* function,
         RealType degrees_of_freedom, // degrees_of_freedom (aka nu).
         RealType scale,  // scale (aka sigma^2)
@@ -51,7 +51,7 @@ class inverse_chi_squared_distribution
    typedef RealType value_type;
    typedef Policy policy_type;
 
-   inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale)
+   BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df, RealType l_scale) : m_df(df), m_scale (l_scale)
    {
       RealType result;
       detail::check_df(
@@ -62,7 +62,7 @@ class inverse_chi_squared_distribution
          m_scale, &result,  Policy());
    } // inverse_chi_squared_distribution constructor 
 
-   inverse_chi_squared_distribution(RealType df = 1) : m_df(df)
+   BOOST_MATH_GPU_ENABLED inverse_chi_squared_distribution(RealType df = 1) : m_df(df)
    {
       RealType result;
       m_scale = 1 / m_df ; // Default scale = 1 / degrees of freedom (Wikipedia definition 1).
@@ -71,11 +71,11 @@ class inverse_chi_squared_distribution
          m_df, &result, Policy());
    } // inverse_chi_squared_distribution
 
-   RealType degrees_of_freedom()const
+   BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const
    {
       return m_df; // aka nu
    }
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    {
       return m_scale;  // aka xi
    }
@@ -105,28 +105,28 @@ inverse_chi_squared_distribution(RealType,RealType)->inverse_chi_squared_distrib
 #endif
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> range(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
 {  // Range of permissible values for random variable x.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + infinity.
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // 0 to + infinity.
 }
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> support(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const inverse_chi_squared_distribution<RealType, Policy>& /*dist*/)
 {  // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), tools::max_value<RealType>()); // 0 to + infinity.
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), tools::max_value<RealType>()); // 0 to + infinity.
 }
 
 template <class RealType, class Policy>
-RealType pdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED RealType pdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions.
    RealType df = dist.degrees_of_freedom();
    RealType scale = dist.scale();
    RealType error_result;
 
-   static const char* function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
 
    if(false == detail::check_inverse_chi_squared
      (function, df, scale, &error_result, Policy())
@@ -159,9 +159,9 @@ RealType pdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, con
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
 {
-   static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
    RealType df = dist.degrees_of_freedom();
    RealType scale = dist.scale();
    RealType error_result;
@@ -188,13 +188,13 @@ inline RealType cdf(const inverse_chi_squared_distribution<RealType, Policy>& di
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
 {
    using boost::math::gamma_q_inv;
    RealType df = dist.degrees_of_freedom();
    RealType scale = dist.scale();
 
-   static const char* function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
    // Error check:
    RealType error_result;
    if(false == detail::check_df(
@@ -220,13 +220,13 @@ inline RealType quantile(const inverse_chi_squared_distribution<RealType, Policy
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
 {
    using boost::math::gamma_q_inv;
    RealType const& df = c.dist.degrees_of_freedom();
    RealType const& scale = c.dist.scale();
    RealType const& x = c.param;
-   static const char* function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const inverse_chi_squared_distribution<%1%>&, %1%)";
    // Error check:
    RealType error_result;
    if(false == detail::check_df(
@@ -251,14 +251,14 @@ inline RealType cdf(const complemented2_type<inverse_chi_squared_distribution<Re
 } // cdf(complemented
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<inverse_chi_squared_distribution<RealType, Policy>, RealType>& c)
 {
    using boost::math::gamma_q_inv;
 
    RealType const& df = c.dist.degrees_of_freedom();
    RealType const& scale = c.dist.scale();
    RealType const& q = c.param;
-   static const char* function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const inverse_chi_squared_distribution<%1%>&, %1%)";
    // Error check:
    RealType error_result;
    if(false == detail::check_df(function, df, &error_result, Policy()))
@@ -280,12 +280,12 @@ inline RealType quantile(const complemented2_type<inverse_chi_squared_distributi
 } // quantile(const complement
 
 template <class RealType, class Policy>
-inline RealType mean(const inverse_chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_chi_squared_distribution<RealType, Policy>& dist)
 { // Mean of inverse Chi-Squared distribution.
    RealType df = dist.degrees_of_freedom();
    RealType scale = dist.scale();
 
-   static const char* function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)";
+   constexpr auto function = "boost::math::mean(const inverse_chi_squared_distribution<%1%>&)";
    if(df <= 2)
       return policies::raise_domain_error<RealType>(
          function,
@@ -295,11 +295,11 @@ inline RealType mean(const inverse_chi_squared_distribution<RealType, Policy>& d
 } // mean
 
 template <class RealType, class Policy>
-inline RealType variance(const inverse_chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const inverse_chi_squared_distribution<RealType, Policy>& dist)
 { // Variance of inverse Chi-Squared distribution.
    RealType df = dist.degrees_of_freedom();
    RealType scale = dist.scale();
-   static const char* function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)";
+   constexpr auto function = "boost::math::variance(const inverse_chi_squared_distribution<%1%>&)";
    if(df <= 4)
    {
       return policies::raise_domain_error<RealType>(
@@ -311,14 +311,14 @@ inline RealType variance(const inverse_chi_squared_distribution<RealType, Policy
 } // variance
 
 template <class RealType, class Policy>
-inline RealType mode(const inverse_chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_chi_squared_distribution<RealType, Policy>& dist)
 { // mode is not defined in Mathematica.
   // See Discussion section http://en.wikipedia.org/wiki/Talk:Scaled-inverse-chi-square_distribution
   // for origin of the formula used below.
 
    RealType df = dist.degrees_of_freedom();
    RealType scale = dist.scale();
-   static const char* function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)";
+   constexpr auto function = "boost::math::mode(const inverse_chi_squared_distribution<%1%>&)";
    if(df < 0)
       return policies::raise_domain_error<RealType>(
          function,
@@ -341,11 +341,11 @@ inline RealType mode(const inverse_chi_squared_distribution<RealType, Policy>& d
 // Now implemented via quantile(half) in derived accessors.
 
 template <class RealType, class Policy>
-inline RealType skewness(const inverse_chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_chi_squared_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING // For ADL
    RealType df = dist.degrees_of_freedom();
-   static const char* function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)";
+   constexpr auto function = "boost::math::skewness(const inverse_chi_squared_distribution<%1%>&)";
    if(df <= 6)
       return policies::raise_domain_error<RealType>(
          function,
@@ -356,10 +356,10 @@ inline RealType skewness(const inverse_chi_squared_distribution<RealType, Policy
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const inverse_chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_chi_squared_distribution<RealType, Policy>& dist)
 {
    RealType df = dist.degrees_of_freedom();
-   static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
+   constexpr auto function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
    if(df <= 8)
       return policies::raise_domain_error<RealType>(
          function,
@@ -370,10 +370,10 @@ inline RealType kurtosis(const inverse_chi_squared_distribution<RealType, Policy
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const inverse_chi_squared_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_chi_squared_distribution<RealType, Policy>& dist)
 {
    RealType df = dist.degrees_of_freedom();
-   static const char* function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
+   constexpr auto function = "boost::math::kurtosis(const inverse_chi_squared_distribution<%1%>&)";
    if(df <= 8)
       return policies::raise_domain_error<RealType>(
          function,
diff --git a/include/boost/math/distributions/inverse_gamma.hpp b/include/boost/math/distributions/inverse_gamma.hpp
index 8c9e4763d5..6aa798ed82 100644
--- a/include/boost/math/distributions/inverse_gamma.hpp
+++ b/include/boost/math/distributions/inverse_gamma.hpp
@@ -2,6 +2,7 @@
 
 //  Copyright Paul A. Bristow 2010.
 //  Copyright John Maddock 2010.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -22,21 +23,21 @@
 // http://mathworld.wolfram.com/GammaDistribution.html
 // http://en.wikipedia.org/wiki/Gamma_distribution
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/complement.hpp>
 
-#include <utility>
-#include <cfenv>
-
 namespace boost{ namespace math
 {
 namespace detail
 {
 
 template <class RealType, class Policy>
-inline bool check_inverse_gamma_shape(
+BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma_shape(
       const char* function, // inverse_gamma
       RealType shape, // shape aka alpha
       RealType* result, // to update, perhaps with NaN
@@ -57,7 +58,7 @@ inline bool check_inverse_gamma_shape(
 } //bool check_inverse_gamma_shape
 
 template <class RealType, class Policy>
-inline bool check_inverse_gamma_x(
+BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma_x(
       const char* function,
       RealType const& x,
       RealType* result, const Policy& pol)
@@ -73,7 +74,7 @@ inline bool check_inverse_gamma_x(
 }
 
 template <class RealType, class Policy>
-inline bool check_inverse_gamma(
+BOOST_MATH_GPU_ENABLED inline bool check_inverse_gamma(
       const char* function, // TODO swap these over, so shape is first.
       RealType scale,  // scale aka beta
       RealType shape, // shape aka alpha
@@ -92,7 +93,7 @@ class inverse_gamma_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED explicit inverse_gamma_distribution(RealType l_shape = 1, RealType l_scale = 1)
       : m_shape(l_shape), m_scale(l_scale)
    {
       RealType result;
@@ -101,12 +102,12 @@ class inverse_gamma_distribution
         l_scale, l_shape, &result, Policy());
    }
 
-   RealType shape()const
+   BOOST_MATH_GPU_ENABLED RealType shape()const
    {
       return m_shape;
    }
 
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    {
       return m_scale;
    }
@@ -132,27 +133,27 @@ inverse_gamma_distribution(RealType,RealType)->inverse_gamma_distribution<typena
 // Allow random variable x to be zero, treated as a special case (unlike some definitions).
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
 {  // Range of permissible values for random variable x.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const inverse_gamma_distribution<RealType, Policy>& /* dist */)
 {  // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
    using boost::math::tools::min_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const inverse_gamma_distribution<%1%>&, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -195,17 +196,17 @@ inline RealType pdf(const inverse_gamma_distribution<RealType, Policy>& dist, co
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
    using boost::math::lgamma;
 
-   static const char* function = "boost::math::logpdf(const inverse_gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logpdf(const inverse_gamma_distribution<%1%>&, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
 
-   RealType result = -std::numeric_limits<RealType>::infinity();
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
    if(false == detail::check_inverse_gamma(function, scale, shape, &result, Policy()))
    { // distribution parameters bad.
       return result;
@@ -232,11 +233,11 @@ inline RealType logpdf(const inverse_gamma_distribution<RealType, Policy>& dist,
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const inverse_gamma_distribution<%1%>&, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -260,12 +261,12 @@ inline RealType cdf(const inverse_gamma_distribution<RealType, Policy>& dist, co
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_gamma_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
    using boost::math::gamma_q_inv;
 
-   static const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -287,11 +288,11 @@ inline RealType quantile(const inverse_gamma_distribution<RealType, Policy>& dis
 }
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const gamma_distribution<%1%>&, %1%)";
 
    RealType shape = c.dist.shape();
    RealType scale = c.dist.scale();
@@ -310,11 +311,11 @@ inline RealType cdf(const complemented2_type<inverse_gamma_distribution<RealType
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<inverse_gamma_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const inverse_gamma_distribution<%1%>&, %1%)";
 
    RealType shape = c.dist.shape();
    RealType scale = c.dist.scale();
@@ -338,11 +339,11 @@ inline RealType quantile(const complemented2_type<inverse_gamma_distribution<Rea
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const inverse_gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::mean(const inverse_gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -365,11 +366,11 @@ inline RealType mean(const inverse_gamma_distribution<RealType, Policy>& dist)
 } // mean
 
 template <class RealType, class Policy>
-inline RealType variance(const inverse_gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const inverse_gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::variance(const inverse_gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -391,11 +392,11 @@ inline RealType variance(const inverse_gamma_distribution<RealType, Policy>& dis
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const inverse_gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::mode(const inverse_gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -418,11 +419,11 @@ inline RealType mode(const inverse_gamma_distribution<RealType, Policy>& dist)
 //}
 
 template <class RealType, class Policy>
-inline RealType skewness(const inverse_gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::skewness(const inverse_gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -444,11 +445,11 @@ inline RealType skewness(const inverse_gamma_distribution<RealType, Policy>& dis
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const inverse_gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_gamma_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)";
+   constexpr auto function = "boost::math::kurtosis_excess(const inverse_gamma_distribution<%1%>&)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -470,9 +471,9 @@ inline RealType kurtosis_excess(const inverse_gamma_distribution<RealType, Polic
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const inverse_gamma_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_gamma_distribution<RealType, Policy>& dist)
 {
-  static const char* function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)";
+  constexpr auto function = "boost::math::kurtosis(const inverse_gamma_distribution<%1%>&)";
    RealType shape = dist.shape();
    RealType scale = dist.scale();
 
diff --git a/include/boost/math/distributions/inverse_gaussian.hpp b/include/boost/math/distributions/inverse_gaussian.hpp
index b31d1c9257..20d3b6bdd5 100644
--- a/include/boost/math/distributions/inverse_gaussian.hpp
+++ b/include/boost/math/distributions/inverse_gaussian.hpp
@@ -1,6 +1,6 @@
 //  Copyright John Maddock 2010.
 //  Copyright Paul A. Bristow 2010.
-
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -49,17 +49,17 @@
 
 // http://www.statsci.org/s/inverse_gaussian.s  and http://www.statsci.org/s/inverse_gaussian.html
 
-//#include <boost/math/distributions/fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/special_functions/erf.hpp> // for erf/erfc.
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/normal.hpp>
 #include <boost/math/distributions/gamma.hpp> // for gamma function
-
 #include <boost/math/tools/tuple.hpp>
 #include <boost/math/tools/roots.hpp>
-
-#include <utility>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 namespace boost{ namespace math{
 
@@ -70,10 +70,10 @@ class inverse_gaussian_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit inverse_gaussian_distribution(RealType l_mean = 1, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED explicit inverse_gaussian_distribution(RealType l_mean = 1, RealType l_scale = 1)
       : m_mean(l_mean), m_scale(l_scale)
    { // Default is a 1,1 inverse_gaussian distribution.
-     static const char* function = "boost::math::inverse_gaussian_distribution<%1%>::inverse_gaussian_distribution";
+     constexpr auto function = "boost::math::inverse_gaussian_distribution<%1%>::inverse_gaussian_distribution";
 
      RealType result;
      detail::check_scale(function, l_scale, &result, Policy());
@@ -81,22 +81,22 @@ class inverse_gaussian_distribution
      detail::check_x_gt0(function, l_mean, &result, Policy());
    }
 
-   RealType mean()const
+   BOOST_MATH_GPU_ENABLED RealType mean()const
    { // alias for location.
       return m_mean; // aka mu
    }
 
    // Synonyms, provided to allow generic use of find_location and find_scale.
-   RealType location()const
+   BOOST_MATH_GPU_ENABLED RealType location()const
    { // location, aka mu.
       return m_mean;
    }
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    { // scale, aka lambda.
       return m_scale;
    }
 
-   RealType shape()const
+   BOOST_MATH_GPU_ENABLED RealType shape()const
    { // shape, aka phi = lambda/mu.
       return m_scale / m_mean;
    }
@@ -119,29 +119,29 @@ inverse_gaussian_distribution(RealType,RealType)->inverse_gaussian_distribution<
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const inverse_gaussian_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const inverse_gaussian_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x, zero to max.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0.), max_value<RealType>()); // - to + max value.
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0.), max_value<RealType>()); // - to + max value.
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const inverse_gaussian_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const inverse_gaussian_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x, zero to max.
   // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0.),  max_value<RealType>()); // - to + max value.
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0.),  max_value<RealType>()); // - to + max value.
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& x)
 { // Probability Density Function
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType scale = dist.scale();
    RealType mean = dist.mean();
    RealType result = 0;
-   static const char* function = "boost::math::pdf(const inverse_gaussian_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const inverse_gaussian_distribution<%1%>&, %1%)";
    if(false == detail::check_scale(function, scale, &result, Policy()))
    {
       return result;
@@ -171,14 +171,14 @@ inline RealType pdf(const inverse_gaussian_distribution<RealType, Policy>& dist,
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& x)
 { // Probability Density Function
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType scale = dist.scale();
    RealType mean = dist.mean();
-   RealType result = -std::numeric_limits<RealType>::infinity();
-   static const char* function = "boost::math::logpdf(const inverse_gaussian_distribution<%1%>&, %1%)";
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
+   constexpr auto function = "boost::math::logpdf(const inverse_gaussian_distribution<%1%>&, %1%)";
    if(false == detail::check_scale(function, scale, &result, Policy()))
    {
       return result;
@@ -198,7 +198,7 @@ inline RealType logpdf(const inverse_gaussian_distribution<RealType, Policy>& di
 
    if (x == 0)
    {
-     return std::numeric_limits<RealType>::quiet_NaN(); // Convenient, even if not defined mathematically. log(0)
+     return boost::math::numeric_limits<RealType>::quiet_NaN(); // Convenient, even if not defined mathematically. log(0)
    }
 
    const RealType two_pi = boost::math::constants::two_pi<RealType>();
@@ -208,13 +208,13 @@ inline RealType logpdf(const inverse_gaussian_distribution<RealType, Policy>& di
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& x)
 { // Cumulative Density Function.
    BOOST_MATH_STD_USING  // for ADL of std functions.
 
    RealType scale = dist.scale();
    RealType mean = dist.mean();
-   static const char* function = "boost::math::cdf(const inverse_gaussian_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const inverse_gaussian_distribution<%1%>&, %1%)";
    RealType result = 0;
    if(false == detail::check_scale(function, scale, &result, Policy()))
    {
@@ -257,11 +257,11 @@ template <class RealType, class Policy>
 struct inverse_gaussian_quantile_functor
 { 
 
-  inverse_gaussian_quantile_functor(const boost::math::inverse_gaussian_distribution<RealType, Policy> dist, RealType const& p)
+  BOOST_MATH_GPU_ENABLED inverse_gaussian_quantile_functor(const boost::math::inverse_gaussian_distribution<RealType, Policy> dist, RealType const& p)
     : distribution(dist), prob(p)
   {
   }
-  boost::math::tuple<RealType, RealType> operator()(RealType const& x)
+  BOOST_MATH_GPU_ENABLED boost::math::tuple<RealType, RealType> operator()(RealType const& x)
   {
     RealType c = cdf(distribution, x);
     RealType fx = c - prob;  // Difference cdf - value - to minimize.
@@ -277,11 +277,11 @@ struct inverse_gaussian_quantile_functor
 template <class RealType, class Policy>
 struct inverse_gaussian_quantile_complement_functor
 { 
-    inverse_gaussian_quantile_complement_functor(const boost::math::inverse_gaussian_distribution<RealType, Policy> dist, RealType const& p)
+  BOOST_MATH_GPU_ENABLED inverse_gaussian_quantile_complement_functor(const boost::math::inverse_gaussian_distribution<RealType, Policy> dist, RealType const& p)
     : distribution(dist), prob(p)
   {
   }
-  boost::math::tuple<RealType, RealType> operator()(RealType const& x)
+  BOOST_MATH_GPU_ENABLED boost::math::tuple<RealType, RealType> operator()(RealType const& x)
   {
     RealType c = cdf(complement(distribution, x));
     RealType fx = c - prob;  // Difference cdf - value - to minimize.
@@ -298,7 +298,7 @@ struct inverse_gaussian_quantile_complement_functor
 namespace detail
 {
   template <class RealType>
-  inline RealType guess_ig(RealType p, RealType mu = 1, RealType lambda = 1)
+  BOOST_MATH_GPU_ENABLED inline RealType guess_ig(RealType p, RealType mu = 1, RealType lambda = 1)
   { // guess at random variate value x for inverse gaussian quantile.
     BOOST_MATH_STD_USING
     using boost::math::policies::policy;
@@ -350,14 +350,14 @@ namespace detail
 } // namespace detail
 
 template <class RealType, class Policy>
-inline RealType quantile(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const inverse_gaussian_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions.
    // No closed form exists so guess and use Newton Raphson iteration.
 
    RealType mean = dist.mean();
    RealType scale = dist.scale();
-   static const char* function = "boost::math::quantile(const inverse_gaussian_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const inverse_gaussian_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(false == detail::check_scale(function, scale, &result, Policy()))
@@ -388,7 +388,7 @@ inline RealType quantile(const inverse_gaussian_distribution<RealType, Policy>&
   // digits used to control how accurate to try to make the result.
   // To allow user to control accuracy versus speed,
   int get_digits = policies::digits<RealType, Policy>();// get digits from policy, 
-  std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>(); // and max iterations.
+  boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>(); // and max iterations.
   using boost::math::tools::newton_raphson_iterate;
   result =
     newton_raphson_iterate(inverse_gaussian_quantile_functor<RealType, Policy>(dist, p), guess, min, max, get_digits, max_iter);
@@ -401,14 +401,14 @@ inline RealType quantile(const inverse_gaussian_distribution<RealType, Policy>&
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<inverse_gaussian_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<inverse_gaussian_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions.
 
    RealType scale = c.dist.scale();
    RealType mean = c.dist.mean();
    RealType x = c.param;
-   static const char* function = "boost::math::cdf(const complement(inverse_gaussian_distribution<%1%>&), %1%)";
+   constexpr auto function = "boost::math::cdf(const complement(inverse_gaussian_distribution<%1%>&), %1%)";
 
    RealType result = 0;
    if(false == detail::check_scale(function, scale, &result, Policy()))
@@ -437,13 +437,13 @@ inline RealType cdf(const complemented2_type<inverse_gaussian_distribution<RealT
 } // cdf complement
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<inverse_gaussian_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<inverse_gaussian_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType scale = c.dist.scale();
    RealType mean = c.dist.mean();
-   static const char* function = "boost::math::quantile(const complement(inverse_gaussian_distribution<%1%>&), %1%)";
+   constexpr auto function = "boost::math::quantile(const complement(inverse_gaussian_distribution<%1%>&), %1%)";
    RealType result = 0;
    if(false == detail::check_scale(function, scale, &result, Policy()))
       return result;
@@ -464,7 +464,7 @@ inline RealType quantile(const complemented2_type<inverse_gaussian_distribution<
   // int digits = std::numeric_limits<RealType>::digits; // Maximum possible binary digits accuracy for type T.
   // digits used to control how accurate to try to make the result.
   int get_digits = policies::digits<RealType, Policy>();
-  std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+  boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
   using boost::math::tools::newton_raphson_iterate;
   result = newton_raphson_iterate(inverse_gaussian_quantile_complement_functor<RealType, Policy>(c.dist, q), guess, min, max, get_digits, max_iter);
   if (max_iter >= policies::get_max_root_iterations<Policy>())
@@ -476,25 +476,25 @@ inline RealType quantile(const complemented2_type<inverse_gaussian_distribution<
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType mean(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const inverse_gaussian_distribution<RealType, Policy>& dist)
 { // aka mu
    return dist.mean();
 }
 
 template <class RealType, class Policy>
-inline RealType scale(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType scale(const inverse_gaussian_distribution<RealType, Policy>& dist)
 { // aka lambda
    return dist.scale();
 }
 
 template <class RealType, class Policy>
-inline RealType shape(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType shape(const inverse_gaussian_distribution<RealType, Policy>& dist)
 { // aka phi
    return dist.shape();
 }
 
 template <class RealType, class Policy>
-inline RealType standard_deviation(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const inverse_gaussian_distribution<RealType, Policy>& dist)
 {
   BOOST_MATH_STD_USING
   RealType scale = dist.scale();
@@ -504,7 +504,7 @@ inline RealType standard_deviation(const inverse_gaussian_distribution<RealType,
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const inverse_gaussian_distribution<RealType, Policy>& dist)
 {
   BOOST_MATH_STD_USING
   RealType scale = dist.scale();
@@ -515,7 +515,7 @@ inline RealType mode(const inverse_gaussian_distribution<RealType, Policy>& dist
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const inverse_gaussian_distribution<RealType, Policy>& dist)
 {
   BOOST_MATH_STD_USING
   RealType scale = dist.scale();
@@ -525,7 +525,7 @@ inline RealType skewness(const inverse_gaussian_distribution<RealType, Policy>&
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const inverse_gaussian_distribution<RealType, Policy>& dist)
 {
   RealType scale = dist.scale();
   RealType  mean = dist.mean();
@@ -534,7 +534,7 @@ inline RealType kurtosis(const inverse_gaussian_distribution<RealType, Policy>&
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const inverse_gaussian_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const inverse_gaussian_distribution<RealType, Policy>& dist)
 {
   RealType scale = dist.scale();
   RealType  mean = dist.mean();
diff --git a/include/boost/math/distributions/landau.hpp b/include/boost/math/distributions/landau.hpp
new file mode 100644
index 0000000000..129eca2879
--- /dev/null
+++ b/include/boost/math/distributions/landau.hpp
@@ -0,0 +1,4642 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_STATS_LANDAU_HPP
+#define BOOST_STATS_LANDAU_HPP
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4127) // conditional expression is constant
+#endif
+
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/distributions/complement.hpp>
+#include <boost/math/distributions/detail/common_error_handling.hpp>
+#include <boost/math/distributions/detail/derived_accessors.hpp>
+#include <boost/math/tools/rational.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/tools/promotion.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
+#include <boost/math/tools/big_constant.hpp>
+#include <utility>
+#include <cmath>
+#endif
+
+namespace boost { namespace math {
+template <class RealType, class Policy>
+class landau_distribution;
+
+namespace detail {
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 6.1179e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(2.62240126375351657026e-1),
+            static_cast<RealType>(3.37943593381366824691e-1),
+            static_cast<RealType>(1.53537606095123787618e-1),
+            static_cast<RealType>(3.01423783265555668011e-2),
+            static_cast<RealType>(2.66982581491576132363e-3),
+            static_cast<RealType>(-1.57344124519315009970e-5),
+            static_cast<RealType>(3.46237168332264544791e-7),
+            static_cast<RealType>(2.54512306953704347532e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.61596691542333069131e0),
+            static_cast<RealType>(1.31560197919990191004e0),
+            static_cast<RealType>(6.37865139714920275881e-1),
+            static_cast<RealType>(1.99051021258743986875e-1),
+            static_cast<RealType>(3.73788085017437528274e-2),
+            static_cast<RealType>(3.72580876403774116752e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if(x < 2){
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.1560e-17
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(1.63531240868022603476e-1),
+            static_cast<RealType>(1.42818648212508067982e-1),
+            static_cast<RealType>(4.95816076364679661943e-2),
+            static_cast<RealType>(8.59234710489723831273e-3),
+            static_cast<RealType>(5.76649181954629544285e-4),
+            static_cast<RealType>(-5.66279925274108366994e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.41478104966077351483e0),
+            static_cast<RealType>(9.41180365857002724714e-1),
+            static_cast<RealType>(3.65084346985789448244e-1),
+            static_cast<RealType>(8.77396986274371571301e-2),
+            static_cast<RealType>(1.24233749817860139205e-2),
+            static_cast<RealType>(8.57476298543168142524e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.1732e-19
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(9.55242261334771588094e-2),
+            static_cast<RealType>(6.66529732353979943139e-2),
+            static_cast<RealType>(1.80958840194356287100e-2),
+            static_cast<RealType>(2.34205449064047793618e-3),
+            static_cast<RealType>(1.16859089123286557482e-4),
+            static_cast<RealType>(-1.48761065213531458940e-7),
+            static_cast<RealType>(4.37245276130361710865e-9),
+            static_cast<RealType>(-8.10479404400603805292e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.21670723402658089612e0),
+            static_cast<RealType>(6.58224466688607822769e-1),
+            static_cast<RealType>(2.00828142796698077403e-1),
+            static_cast<RealType>(3.64962053761472303153e-2),
+            static_cast<RealType>(3.76034152661165826061e-3),
+            static_cast<RealType>(1.74723754509505656326e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.6621e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(3.83643820409470770350e-2),
+            static_cast<RealType>(1.97555000044256883088e-2),
+            static_cast<RealType>(3.71748668368617282698e-3),
+            static_cast<RealType>(3.04022677703754827113e-4),
+            static_cast<RealType>(8.76328889784070114569e-6),
+            static_cast<RealType>(-3.34900379044743745961e-9),
+            static_cast<RealType>(5.36581791174380716937e-11),
+            static_cast<RealType>(-5.50656207669255770963e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(9.09290785092251223006e-1),
+            static_cast<RealType>(3.49404120360701349529e-1),
+            static_cast<RealType>(7.23730835206014275634e-2),
+            static_cast<RealType>(8.47875744543245845354e-3),
+            static_cast<RealType>(5.28021165718081084884e-4),
+            static_cast<RealType>(1.33941126695887244822e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.6311e-19
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.12656323880287532947e-2),
+            static_cast<RealType>(2.87311140580416132088e-3),
+            static_cast<RealType>(2.61788674390925516376e-4),
+            static_cast<RealType>(9.74096895307400300508e-6),
+            static_cast<RealType>(1.19317564431052244154e-7),
+            static_cast<RealType>(-6.99543778035110375565e-12),
+            static_cast<RealType>(4.33383971045699197233e-14),
+            static_cast<RealType>(-1.75185581239955717728e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.94430267268436822392e-1),
+            static_cast<RealType>(1.00370783567964448346e-1),
+            static_cast<RealType>(1.05989564733662652696e-2),
+            static_cast<RealType>(6.04942184472254239897e-4),
+            static_cast<RealType>(1.72741008294864428917e-5),
+            static_cast<RealType>(1.85398104367945191152e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.6459e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(2.83847488747490686627e-3),
+            static_cast<RealType>(4.95641151588714788287e-4),
+            static_cast<RealType>(2.79159792287747766415e-5),
+            static_cast<RealType>(5.93951761884139733619e-7),
+            static_cast<RealType>(3.89602689555407749477e-9),
+            static_cast<RealType>(-4.86595415551823027835e-14),
+            static_cast<RealType>(9.68524606019510324447e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.01847536766892219351e-1),
+            static_cast<RealType>(3.63152433272831196527e-2),
+            static_cast<RealType>(2.20938897517130866817e-3),
+            static_cast<RealType>(7.05424834024833384294e-5),
+            static_cast<RealType>(1.09010608366510938768e-6),
+            static_cast<RealType>(6.08711307451776092405e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.5205e-17
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(6.85767880395157523315e-4),
+            static_cast<RealType>(4.08288098461672797376e-5),
+            static_cast<RealType>(8.10640732723079320426e-7),
+            static_cast<RealType>(6.10891161505083972565e-9),
+            static_cast<RealType>(1.37951861368789813737e-11),
+            static_cast<RealType>(-1.25906441382637535543e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.23722380864018634550e-1),
+            static_cast<RealType>(6.05800403141772433527e-3),
+            static_cast<RealType>(1.47809654123655473551e-4),
+            static_cast<RealType>(1.84909364620926802201e-6),
+            static_cast<RealType>(1.08158235309005492372e-8),
+            static_cast<RealType>(2.16335841791921214702e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(x) < 8) {
+        RealType t = log2(ldexp(x, -6));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.5572e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(6.78613480244945294595e-1),
+            static_cast<RealType>(9.61675759893298556080e-1),
+            static_cast<RealType>(3.45159462006746978086e-1),
+            static_cast<RealType>(6.32803373041761027814e-2),
+            static_cast<RealType>(6.93646175256407852991e-3),
+            static_cast<RealType>(4.69867700169714338273e-4),
+            static_cast<RealType>(1.76219117171149694118e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.44693640094228656726e0),
+            static_cast<RealType>(5.46298626321591162873e-1),
+            static_cast<RealType>(1.01572892952421447864e-1),
+            static_cast<RealType>(1.04982575345680980744e-2),
+            static_cast<RealType>(7.65591730392359463367e-4),
+            static_cast<RealType>(2.69383817793665674679e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else if (ilogb(x) < 16) {
+        RealType t = log2(ldexp(x, -8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.7408e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(6.51438485661317103070e-1),
+            static_cast<RealType>(2.67941671074735988081e-1),
+            static_cast<RealType>(5.18564629295719783781e-2),
+            static_cast<RealType>(6.18976337233135940231e-3),
+            static_cast<RealType>(5.08042228681335953236e-4),
+            static_cast<RealType>(2.97268230746003939324e-5),
+            static_cast<RealType>(1.24283200336057908183e-6),
+            static_cast<RealType>(3.35670921544537716055e-8),
+            static_cast<RealType>(5.06987792821954864905e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.23792506680780833665e-1),
+            static_cast<RealType>(8.17040643791396371682e-2),
+            static_cast<RealType>(9.63961713981621216197e-3),
+            static_cast<RealType>(8.06584713485725204135e-4),
+            static_cast<RealType>(4.62050471704120102023e-5),
+            static_cast<RealType>(1.96919734048024406173e-6),
+            static_cast<RealType>(5.23890369587103685278e-8),
+            static_cast<RealType>(7.99399970089366802728e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else if (ilogb(x) < 32) {
+        RealType t = log2(ldexp(x, -16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0195e-17
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(6.36745544906925230102e-1),
+            static_cast<RealType>(2.06319686601209029700e-1),
+            static_cast<RealType>(3.27498059700133287053e-2),
+            static_cast<RealType>(3.30913729536910108000e-3),
+            static_cast<RealType>(2.34809665750270531592e-4),
+            static_cast<RealType>(1.21234086846551635407e-5),
+            static_cast<RealType>(4.55253563898240922019e-7),
+            static_cast<RealType>(1.17544434819877511707e-8),
+            static_cast<RealType>(1.76754192209232807941e-10),
+            static_cast<RealType>(-2.78616504641875874275e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.24145654925686670201e-1),
+            static_cast<RealType>(5.14350019501887110402e-2),
+            static_cast<RealType>(5.19867984016649969928e-3),
+            static_cast<RealType>(3.68798608372265018587e-4),
+            static_cast<RealType>(1.90449594112666257344e-5),
+            static_cast<RealType>(7.15068261954120746192e-7),
+            static_cast<RealType>(1.84646096630493837656e-8),
+            static_cast<RealType>(2.77636277083994601941e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else if (ilogb(x) < 64) {
+        RealType t = log2(ldexp(x, -32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.0433e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(6.36619776379492082324e-1),
+            static_cast<RealType>(2.68158440168597706495e-1),
+            static_cast<RealType>(5.49040993767853738389e-2),
+            static_cast<RealType>(7.23458585096723552751e-3),
+            static_cast<RealType>(6.85438876301780090281e-4),
+            static_cast<RealType>(4.84561891424380633578e-5),
+            static_cast<RealType>(2.82092117716081590941e-6),
+            static_cast<RealType>(9.57557353473514565245e-8),
+            static_cast<RealType>(5.16773829224576217348e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.21222294324039934056e-1),
+            static_cast<RealType>(8.62431574655015481812e-2),
+            static_cast<RealType>(1.13640608906815986975e-2),
+            static_cast<RealType>(1.07668486873466248474e-3),
+            static_cast<RealType>(7.61148039258802068270e-5),
+            static_cast<RealType>(4.43109262308946031382e-6),
+            static_cast<RealType>(1.50412757354817481381e-7),
+            static_cast<RealType>(8.11746432728995551732e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else{
+        result = 2 / (constants::pi<RealType>() * x * x);
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 7.4629e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62240126375351657025589608183516471315e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94698530837122818345222883832757839888e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06728003509081587907620543204047536319e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.41256254272104786752190871391781331271e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34420233794664437979710204055323742199e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55021337841765667713712845735938627884e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90557752737535583908921594594761570259e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.89899202021818926241643215600800085123e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.19635143827754893815649685600837995626e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90989458941330917626663002392683325107e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92038069341802550019371049232152823407e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.40251964644989324856906264776204142653e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55873076454666680466531097660277995317e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.80771940886011613393622410616035955976e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35771004134750535117224809381897395331e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37002484862962406489509174332580745411e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40833952846707180337506160933176158766e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81709029902887471895588386777029652661e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98824705588020901032379932614151640505e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.83767868823957223030472664574235892682e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35718995485026064249286377096427165287e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37305148463792922843850823142976586205e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06575764439154972544253668821920460826e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07663693811543002088092708395572161856e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09711221791106684926377106608027279057e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91302186546138009232520527964387543006e-6),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.6684e-38
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63531240868022603475813051802104652763e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17803013130262393286657457221415701909e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77780575692956605214628767143941600132e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44224824965135546671876867759691622832e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.93294212655117265065191070995706405837e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16021988737209938284910541133167243163e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89245591723934954825306673917695058577e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09614731993308746343064543583426077485e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48578173962833046113032690615443901556e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91098199913613774034789276073191721350e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46788618410999858374206722394998550706e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.14296339768511312584670061679121003569e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.52631422678659858574974085885146420544e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48481735580594347909096198787726314434e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91598585888012869317473155570063821216e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12672162924784178863164220170459406872e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06981909640884405591730537337036849744e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89767326897694369071250285702215471082e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05098647402530640576816174680275844283e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10454903166951593161839822697382452489e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08850649343579977859251275585834901546e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21168773136767495960695426112972188729e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21420361560900449851206650427538430926e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.84456961344035545134425261150891935402e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46462389440125559723382692664970874255e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.3397e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.55242261334771588093967856464157010584e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48866040463435403672044647455806606078e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04241715667984551487882549843428953917e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.32030608366022483736940428739436921577e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17209924605508887793687609139940354371e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.16808856405217460367038406337257561698e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75466331296758720822164534334356742122e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35657250222166360635152712608912585973e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28870137478821561164537700376942753108e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07556331078347991810236646922418944687e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18067019247793233704208913546277631267e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96745094401496364651919224112160111958e-12),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07735872062601280828576861757316683396e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00667909426245388114411629440735066799e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18840123665979969294228925712434860653e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79233661359264185181083948452464063323e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38221013998193410441723488211346327478e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91365002115280149925615665651486504495e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50379182630668701710656913597366961277e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03946139315999749917224356955071595508e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95417998434227083224840824790387887539e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05109028829536837163462811783445124876e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.33125282515685091345480270760501403655e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.58127838888839012133236453180928291822e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64781659622256824499981528095809140284e-12),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.0238e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83643820409470770350079809236512802618e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02996762669868036727057860510914079553e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88220267784864518806154823373656292346e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12677705163934102871251710968247891123e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96642570169484318623869835991454809217e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04358807405587072010621764865118316919e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09461879230275452416933096674703383719e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06823998699058163165831211561331795518e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24129479811279469256914665585439417704e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01799222004929573125167949870797564244e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27744716755834439008073010185921331093e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.64210356143729930758657624381557123115e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11666384975358223644665199669986358056e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.30202644697506464624965700043476935471e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44479208003384373099160875893986831861e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.54290037675901616362332580709754113529e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79815821498858750185823401350096868195e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01076480676864621093034009679744852375e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88607467767854661547920709472888000469e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51572461182263866462295745828009170865e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39843444671402317250813055670653845815e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60546478324160472036295355872288494327e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.25462551353792877506974677628167909695e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05915328498722701961972258866550409117e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20632869761578411246344533841556350518e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99438347491752820345051091574883391217e-12),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.2541e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12656323880287532946687856443190592955e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31374972240605659239154788518240221417e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10776910971729651587578902049263096117e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.53872632372452909103332647334935138324e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81756147611150151751911596225474463602e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75302607308223110644722612796766590029e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33839913867469199941739467004997833889e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.32115127487193219555283158969582307620e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.90766547421015851413713511917307214275e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08939895797457378361211153362169024503e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88597187949354708113046662952288249250e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62829447082637808482463811005771133942e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.65525705592205245661726488519562256000e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60698835222044786453848932477732972928e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88605948104664828377228254521124685930e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58594705700945215121673591119784576258e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.67113091918430152113322758216774649130e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39583889554372147091140765508385042797e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57139043074134496391251233307552940106e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26451960029396455805403758307828624817e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30400557427446929311350088728080667203e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99617890540456503276038942480115937467e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.50232186816498003232143065883536003942e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59310652872918546431499274822722004981e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82203579442241682923277858553949327687e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10345359368438386945407402887625511801e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55225829972215033873365516486524181445e-17),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.1276e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83847488747490686627461184914507143000e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.61220392257287638364190361688188696363e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42217711448675893329072184826328300776e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20597728166467972373586650878478687059e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46404433551447410467051774706080733051e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27909145305324391651548849043874549520e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.33564789388635859003082815215888382619e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.18456219811686603951886248687349029515e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.92730718471866912036453008101994816885e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51773776414973336511129801645901922234e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32371094281803507447435352076735970857e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44775294242071078601023962869394690897e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.94920633206242554892676642458535141153e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18030442958390399095902441284074544279e-31),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.65871972115253665568580046072625013145e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.74531522538358367003224536101724206626e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20716479628426451344205712137554469781e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.83247584368619500260722365812456197226e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52931189426842216323461406426803698335e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19343566926626449933230814579037896037e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.16243058880148231471744235009435586353e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21344088555713979086041331387697053780e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63246599173435592817113618949498524238e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43426263963680589288791782556801934305e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.62386317351298917459659548443220451300e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13281535580097407374477446521496074453e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27187882784316306216858933778750811182e-21),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.8458e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.85767880395157523314894776472286059373e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07684379950498990874449661385130414967e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.29181715091139597455177955800910928786e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78745116935613858188145093313446961899e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.61522707085521545633529621526418843836e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00989556810424018339768632204186394735e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94136605359672888838088037894401904574e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.15203266224687619299892471650072720579e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25349098945982074415471295859193558426e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31874620165906020409111024866737082384e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19330888204484008667352280840160186671e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.89951131249530265518610784629981482444e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35979606245171162602352579985003194602e-33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.50946115943875327149319867495704969908e-36),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21212467547297045538111676107434471585e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17663841151156626845609176694801024524e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25478800461954401173897968683982253458e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.69831763649657690166671862562231448718e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19712058726935472913461138967922524612e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11423395018514913507624349385447326009e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58664605420655866109404476637021322838e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15398721299264752103644541934654351463e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17567858878427250079920401604119982576e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92808825029184923713064129493385469531e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06007644624654848502783947087038305433e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.01246784499782934986619755015082182398e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(x) < 8) {
+        RealType t = log2(ldexp(x, -6));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.6634e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.78613480244945294594505480426643613242e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07362312709864018864207848733814857157e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47727521897653923649758175033206259109e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04183129813120998456717217121703605830e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09978729224187570508825456585418357590e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98739784100617344335742510102186570437e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08596635852958074572320481325030046975e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34947456497875218771996878497766058580e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31766866003171430205401377671093088134e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29444683984117745298484117924452498776e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34885173277203843795065094551227568738e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30306828175920576070486704404727265760e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.05908347665846652276910544097430115068e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07218191317166728296013167220324207427e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38908532499742180532814291654329829544e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63676664387672566455490461784630320677e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31302647779056928216789214742790688980e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.69477260342662648574925942030720482689e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.82918424748192763052497731722563414651e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69244295675395948278971027618145225216e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08928780307959133484802547123672997757e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.11055350627948183551681634293425028439e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22066081452382450191191677443527136733e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78025987104169227624653323808131280009e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93164997733174955208299290433803918816e-13),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else if (ilogb(x) < 16) {
+        RealType t = log2(ldexp(x, -8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.1919e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.51438485661317103069553924870169052838e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.29652867028564588922931020456447362877e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90557738902930002845457640269863338815e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47622170600415955276436226439948455362e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75198213226024095368607442455597948634e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73010116224706573149404022585502812698e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33440551266376466187512220300943206212e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27556365758364667507686872656121131255e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63395763346533783414747536236033733143e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75408632486279069728789506666930014630e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.74194099205847568739445023334735086627e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52462367172968216583968200390021647482e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75367408334713835736514158797013854282e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.62633983586253025227038002631010874719e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46717630077826649018810277799043037738e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00642537643332236333695338824014611799e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47351714774371338348451112020520067028e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15896012319823666881998903857141624070e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62176014448801854863922778456328119208e-25),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79042471052521112984740498925369905803e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.55058068535501327896327971200536085268e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33143443551335870264469963604049242325e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75325348141376361676246108294525717629e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28871858542582365161221803267369985933e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.23702867786056336210872367019916245663e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14513776996445072162386201808986222616e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13763070277828149031445006534179375988e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75529866599039195417128499359378019030e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53029524184341515115464886126119582515e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.10598685541492162454676538516969294049e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75587930183994618721688808612207567233e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.83150895141383746641924725237948860959e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30675015193353451939138512698571954110e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.71774361582156518394662911172142577047e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03397072601182597002547703682673198965e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.52666999314026491934445577764441483687e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else if (ilogb(x) < 32) {
+        RealType t = log2(ldexp(x, -16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2411e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36745544906925230101752563433306496000e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73688900814770369626527563956988302379e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.81718746296195151971617726268038570065e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13663059680440438907042970413471861121e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.40004645275531255402942177790836798523e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80059489775751412372432345156902685277e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47699576477278882708291693658669435536e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45226121992756638990044029871581321461e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13406331882918393195342615955627442395e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46682598893946975917562485374893408094e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50450743907497671918301557074470352707e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33121239192492785826422815650499088833e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05998176182038788839361491871608950696e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17857918044922309623941523489531919822e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67865547879145051715131144371287619666e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.89654931108624296326740455618289840327e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73017950634516660552375272495618707905e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68519137981001059472024985205381913202e-24),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.29948066505039082395951244410552705780e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.13730690908098361287472898564563217987e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27810872138103132689695155123062073221e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31948058845675193039732511839435290811e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06822729610151747708260147063757668707e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03250522904270408071762059653475885811e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85197262150009124871794386644476067020e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78139536405831228129042087771755615472e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01642938314578533660138738069251610818e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36328724659833107203404258336776286146e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80342430290059616305921915291683180697e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66502051110007556897014898713746069491e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42209715361911856322028597714105225748e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.77842582605458905635718323117222788078e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69147628396460384758492682185049535079e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.43015110519230289924122344324563890953e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21788819753161690674882271896091269356e-24),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else if (ilogb(x) < 64) {
+        RealType t = log2(ldexp(x, -32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0348e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619776379492082323649724050601750141e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29818158612993476124594583743266388964e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07736315744724186061845512973085067283e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72566458808745644851080213349673559756e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01243670706840752914099834172565920736e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65306557791300593593488790517297048902e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41751291649776832705247036453540452119e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26652535267657618112731521308564571490e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32742926765578976373764178875983383214e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.32948532312961882464151446137719196209e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96536595631611560703804402181953334762e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48463581600017734001916804890205661347e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65588239861378749665334852913775575615e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39462290798829172203386678450961569536e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83049279786679854738508318703604392055e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87131679136229094080572090496960701828e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35977519905679446758726709186381481753e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.50639358104925465711435411537609380290e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47593981758247424082096107205150226114e-40),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60997521267746350015610841742718472657e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.40470704349086277215167519790809981379e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.42305660178694704379572259575557934523e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30272082429322808188807034927827414359e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16742567294582284534194935923915261582e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22662407906450293978092195442686428843e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.84343501655116670387608730076359018869e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65591734166216912475609790035240582537e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15131286290573570519912674341226377625e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08718962387679715644203327604824250850e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.04444946843492647477476784817227903589e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35966282749098189010715902284098451987e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19066854132814661112207991393498039851e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87533136296192957063599695937632598999e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93945754223094281767677343057286164777e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13592988790740273103099465658198617078e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.64942281110142621080966631872844557766e-26),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else if (ilogb(x) < 128) {
+        RealType t = log2(ldexp(x, -64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.3963e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581344984274685280416528592e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72417390936686577479751162141499390532e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74319117326966091295365258834959120634e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94269681742277805376258823511210253023e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09354876913180019634171748490068797632e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46986612543101357465265079580805403382e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21726753043764920243710352514279216684e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29971756326232375757519588897328507962e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06770117983967828996891025614645348127e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27141668055392041978388268556174062945e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48383887723476619460217715361289178429e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49530301203157403427315504054500005836e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18668427867427341566476567665953082312e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73377083349017331494144334612902128610e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32380647653444581710582396517056104063e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.29865827039123699411352876626634361936e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07464506614287925844993490382319608619e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60173555862875972119871402681133785088e-23),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27912237038396638341492536677313983747e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02138359905285600768927677649467546192e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24763589856532154099789305018886222841e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27133166772875885088000073325642460162e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01628419446817660009223289575239926907e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62446834592284424116329218260348474201e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61238790103816844895453935630752859272e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67714109140674398508739253084218270557e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70952563202454851902810005226033501692e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33080865791583428494353408816388908148e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06120545912923145572220606396715398781e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86403930600680015325844027465766431761e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.29419718354538719350803683985104818654e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.36261565790718847159482447247645891176e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46062982552515416754702177333530968405e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68804852250549346018535616711418533423e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51600033199082754845231795160728350588e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x * x);
+    }
+    else {
+        result = 2 / (constants::pi<RealType>() * x * x);
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if(x >= -1){
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.3928e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(2.21762208692280384264e-1),
+            static_cast<RealType>(7.10041055270973473923e-1),
+            static_cast<RealType>(8.66556480457430718380e-1),
+            static_cast<RealType>(4.78718713740071686348e-1),
+            static_cast<RealType>(1.03670563650247405820e-1),
+            static_cast<RealType>(4.31699263023057628473e-3),
+            static_cast<RealType>(1.72029926636215817416e-3),
+            static_cast<RealType>(-2.76271972015177236271e-4),
+            static_cast<RealType>(1.89483904652983701680e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(2.18155995697310361937e0),
+            static_cast<RealType>(2.53173077603836285217e0),
+            static_cast<RealType>(1.91802065831309251416e0),
+            static_cast<RealType>(9.94481663032480077373e-1),
+            static_cast<RealType>(3.72037148486473195054e-1),
+            static_cast<RealType>(8.85828240211801048938e-2),
+            static_cast<RealType>(1.41354784778520560313e-2),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.4742e-18
+        BOOST_MATH_STATIC const RealType P[11] = {
+            static_cast<RealType>(6.50763682207511020789e-3),
+            static_cast<RealType>(5.73790055136022120436e-2),
+            static_cast<RealType>(2.22375662069496257066e-1),
+            static_cast<RealType>(4.92288611166073916396e-1),
+            static_cast<RealType>(6.74552077334695078716e-1),
+            static_cast<RealType>(5.75057550963763663751e-1),
+            static_cast<RealType>(2.85690710485234671432e-1),
+            static_cast<RealType>(6.73776735655426117231e-2),
+            static_cast<RealType>(3.80321995712675339999e-3),
+            static_cast<RealType>(1.09503400950148681072e-3),
+            static_cast<RealType>(-9.00045301380982997382e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[11] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.07919389927659014373e0),
+            static_cast<RealType>(2.56142472873207168042e0),
+            static_cast<RealType>(1.68357271228504881003e0),
+            static_cast<RealType>(2.23924151033591770613e0),
+            static_cast<RealType>(9.05629695159584880257e-1),
+            static_cast<RealType>(8.94372028246671579022e-1),
+            static_cast<RealType>(1.98616842716090037437e-1),
+            static_cast<RealType>(1.70142519339469434183e-1),
+            static_cast<RealType>(1.46288923980509020713e-2),
+            static_cast<RealType>(1.26171654901120724762e-2),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        const static RealType lambda_bias = static_cast<RealType>(1.45158270528945486473); // (= log(pi/2)+1)
+
+        RealType sigma = exp(-x * constants::pi<RealType>() / 2 - lambda_bias);
+        RealType s = exp(-sigma) * sqrt(sigma);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 5.8685e-18
+            BOOST_MATH_STATIC const RealType P[8] = {
+                static_cast<RealType>(6.31126317567898819465e-1),
+                static_cast<RealType>(5.28493759149515726917e-1),
+                static_cast<RealType>(3.28301410420682938866e-1),
+                static_cast<RealType>(1.31682639578153092699e-1),
+                static_cast<RealType>(3.86573798047656547423e-2),
+                static_cast<RealType>(7.77797337463414935830e-3),
+                static_cast<RealType>(9.97883658430364658707e-4),
+                static_cast<RealType>(6.05131104440018116255e-5),
+            };
+            BOOST_MATH_STATIC const RealType Q[8] = {
+                static_cast<RealType>(1),
+                static_cast<RealType>(8.47781139548258655981e-1),
+                static_cast<RealType>(5.21797290075642096762e-1),
+                static_cast<RealType>(2.10939174293308469446e-1),
+                static_cast<RealType>(6.14856955543769263502e-2),
+                static_cast<RealType>(1.24427885618560158811e-2),
+                static_cast<RealType>(1.58973907730896566627e-3),
+                static_cast<RealType>(9.66647686344466292608e-5),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -5.1328125) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 3.2532e-17
+            BOOST_MATH_STATIC const RealType P[9] = {
+                static_cast<RealType>(6.26864481454444278646e-1),
+                static_cast<RealType>(5.10647753508714204745e-1),
+                static_cast<RealType>(1.98551443303285119497e-1),
+                static_cast<RealType>(4.71644854289800143386e-2),
+                static_cast<RealType>(7.71285919105951697285e-3),
+                static_cast<RealType>(8.93551020612017939395e-4),
+                static_cast<RealType>(6.97020145401946303751e-5),
+                static_cast<RealType>(4.17249760274638104772e-6),
+                static_cast<RealType>(7.73502439313710606153e-12),
+            };
+            BOOST_MATH_STATIC const RealType Q[8] = {
+                static_cast<RealType>(1),
+                static_cast<RealType>(8.15124079722976906223e-1),
+                static_cast<RealType>(3.16755852188961901369e-1),
+                static_cast<RealType>(7.52819418000330690962e-2),
+                static_cast<RealType>(1.23053506566779662890e-2),
+                static_cast<RealType>(1.42615273721494498141e-3),
+                static_cast<RealType>(1.11211928184477279204e-4),
+                static_cast<RealType>(6.65899898061789485757e-6),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x >= -1) {
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2803e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21762208692280384264052188465103527015e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07121154108880017947709737976750200391e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34036993772851526455115746887751392080e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06347688547967680654012636399459376006e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68662427153576049083876306225433068713e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67496398036468361727297056409545434117e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69289909624425652939466055042210850769e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65649060232973461318206716040181929160e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93006819232611588097575675157841312689e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34514211575975820725706925256381036061e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86184594939834946952489805173559003431e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66982890863184520310462776294335540260e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.28944885271022303878175622411438230193e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.47136245900831864668353768185407977846e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98034330388999615249606466662289782222e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.67931741921878993598048665757824165533e-12),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81019852414657529520034272090632311645e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51602582973416348091361820936922274106e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.87246706500788771729605610442552651673e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.55758863380051182011815572544985924963e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.16921634066377885762356020006515057786e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28590978860106110644638308039189352463e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07182688002603587927920766666962846169e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14413931232875917473403467095618397172e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59534588679183116305361784906322155131e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62788361787003488572546802835677555151e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32291670834750583053201239125839728061e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97300476673137879475887158731166178829e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99801949382703479169010768105376163814e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09234481837537672361990844588166022791e-5),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.8590e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.50763682207511020788551990942118742910e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35160148798611192350830963080055471564e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85567614778755464918744664468938413626e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24395902843792338723377508551415399267e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75803588325237557939443967923337822799e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44751743702858358960016891543930028989e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38771920793989989423514808134997891434e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99899457801652012757624005300136548027e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.59668432891116320233415536189782241116e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02521376213276025040458141317737977692e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00511857068867825025582508627038721402e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19031970665203475373248353773765801546e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.03203906044415590651592066934331209362e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01354553335348149914596284286907046333e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40077709279222086527834844446288408059e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07036291955272673946830858788691198641e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75229595324028909877518859428663744660e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.51522041748753421579496885726802106514e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28554063325397021905295499768922434904e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55889733194498836168215560931863059152e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45050534010127542130960211621894286688e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39437268390909980446225806216001154876e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85370557677145869100298813360909127310e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99358236671478050470186012149124879556e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82914467302553175692644992910876515874e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42426383410763382224410804289834740252e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69477085497572590673874940261777949808e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.69832833104494997844651343499526754631e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95708391432781281454592429473451742972e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32541987059874996779040445020449508142e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.24889827757289516008834701298899804535e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76326709965329347689033555841964826234e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.19652942193884551681987290472603208296e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22987197033955835618810845653379470109e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51893290463268547258382709202599507274e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43575882043846146581825453522967678538e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06683418138599962787868832158681391673e-5),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        const static RealType lambda_bias = BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.451582705289454864726195229894882143572); // (= log(pi/2)+1)
+
+        RealType sigma = exp(-x * constants::pi<RealType>() / 2 - lambda_bias);
+        RealType s = exp(-sigma) * sqrt(sigma);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 7.0019e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[18] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.31126317567898819464557840628449107915e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.31008645911415314700225107327351636697e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.60743397071713227215207831174512626190e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69243936604887410595461520921270733657e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93778117053417749769040328795824088196e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04718815412035890861219665332918840537e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41914050146414549019258775115663029791e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17147074474397510167661838243237386450e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31006358624990533313832878493963971249e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.31424805670861981190416637260176493218e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71604447221961082506919140038819715820e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01796816886825676412069047911936154422e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16975381608692872525287947181531051179e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47194712963929503930146780326366215579e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19469248860267489980690249379132289464e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22272545853285700254948346226514762534e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.05432616288832680241611577865488417904e-13),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08723511461992818779941378551362882730e-14),
+            };
+            BOOST_MATH_STATIC const RealType Q[16] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01021278581037282130358759075689669228e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.91783545335316986601746168681457332835e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90025337163174587593060864843160047245e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09029833197792884728968597136867674585e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44295160726145715084515736090313329125e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46416375246465800703437031839310870287e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86521610039165178072099210670199368231e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29830357713744587265637686549132688965e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32187562202835921333177458294507064946e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75034541113922116856456794810138543224e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79216314818261657918748858010817570215e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.69179323869133503169292092727333289999e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.09019623876540244217038375274802731869e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13900582194674129200395213522524183495e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92590979457175565666605415984496551246e-9),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -6.875) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 6.4095e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[18] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26864481454444278645937156746132802908e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35052316263030534355724898036735352905e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46701697626917441774916114124028252971e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03805679118924248671851611170709699862e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29457230118834515743802694404620370943e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04992992250026414994541561073467805333e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.21521951889983113700615967351903983850e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50611640491200231504944279876023072268e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96007721851412367657495076592244098807e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76876967456744990483799856564174838073e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34285198828980523126745002596084187049e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.98811180672843179022928339476420108494e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36933707823930146448761204037985193905e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76515121042989743198432939393805252169e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87259915481622487665138935922067520210e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34703958446785695676542385299325713141e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53199672688507288037695102377982544434e-12),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97283413733676690377949556457649405210e-14),
+            };
+            BOOST_MATH_STATIC const RealType Q[18] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15492787140203223641846510939273526038e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34095796298757853634036909432345998054e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65650140652391522296109869665871008634e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.44894089102275258806976831589022821974e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27121975866547045393504246592187721233e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.91803733484503004520983723890062644122e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40341451263971324381655967408519161854e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72360046810103129487529493828280649599e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.60986435254173073868329335245110986549e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01216966786091058959421242465309838187e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11514619470960373138100691463949937779e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01639426441970732201346798259534312372e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.60411422906070056043690129326288757143e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58398956202137709744885774931524547894e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14956902064425256856583295469934064903e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23201118234279642321630988607491208515e-12),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43185798646451225275728735761433082676e-13),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53> &tag) {
+    if (x >= 0) {
+        return landau_pdf_plus_imp_prec<RealType>(x, tag);
+    }
+    else if (x <= 0) {
+        return landau_pdf_minus_imp_prec<RealType>(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>& tag) {
+    if (x >= 0) {
+        return landau_pdf_plus_imp_prec<RealType>(x, tag);
+    }
+    else if (x <= 0) {
+        return landau_pdf_minus_imp_prec<RealType>(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType landau_pdf_imp(const landau_distribution<RealType, Policy>& dist, const RealType& x) {
+    //
+    // This calculates the pdf of the Landau distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::pdf(landau<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+    RealType bias = dist.bias();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale + bias;
+
+    result = landau_pdf_imp_prec(u, tag_type()) / scale;
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 2.7348e-18
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(6.34761298487625202628e-1),
+            static_cast<RealType>(7.86558857265845597915e-1),
+            static_cast<RealType>(4.30220871807399303399e-1),
+            static_cast<RealType>(1.26410946316538340541e-1),
+            static_cast<RealType>(2.09346669713191648490e-2),
+            static_cast<RealType>(1.48926177023501002834e-3),
+            static_cast<RealType>(-5.93750588554108593271e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.65227304522196452589e0),
+            static_cast<RealType>(1.29276828719607419526e0),
+            static_cast<RealType>(5.93815051307098615300e-1),
+            static_cast<RealType>(1.69165968013666952456e-1),
+            static_cast<RealType>(2.84272940328510367574e-2),
+            static_cast<RealType>(2.28001970477820696422e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.1487e-17
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(4.22133240358047652363e-1),
+            static_cast<RealType>(3.48421126689016131480e-1),
+            static_cast<RealType>(1.15402429637790321091e-1),
+            static_cast<RealType>(1.90374044978864005061e-2),
+            static_cast<RealType>(1.26628667888851698698e-3),
+            static_cast<RealType>(-5.75103242931559285281e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.21277435324167238159e0),
+            static_cast<RealType>(6.38324046905267845243e-1),
+            static_cast<RealType>(1.81723381692749892660e-1),
+            static_cast<RealType>(2.80457012073363245106e-2),
+            static_cast<RealType>(1.93749385908189487538e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.2975e-17
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(2.95892137955791216378e-1),
+            static_cast<RealType>(2.29083899043580095868e-1),
+            static_cast<RealType>(7.09374171394372356009e-2),
+            static_cast<RealType>(1.08774274442674552229e-2),
+            static_cast<RealType>(7.69674715320139398655e-4),
+            static_cast<RealType>(1.63486840000680408991e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.09704883482087441931e0),
+            static_cast<RealType>(5.10139057077147935327e-1),
+            static_cast<RealType>(1.27055234007499238241e-1),
+            static_cast<RealType>(1.74542139987310825683e-2),
+            static_cast<RealType>(1.18944143641885993718e-3),
+            static_cast<RealType>(2.55296292914537992309e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.6740e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.73159318667565938776e-1),
+            static_cast<RealType>(6.95847424776057206679e-2),
+            static_cast<RealType>(1.04513924567165899506e-2),
+            static_cast<RealType>(6.35094718543965631442e-4),
+            static_cast<RealType>(1.04166111154771164657e-5),
+            static_cast<RealType>(1.43633490646363733467e-9),
+            static_cast<RealType>(-4.55493341295654514558e-11),
+            static_cast<RealType>(6.71119091495929467041e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(6.23409270429130114247e-1),
+            static_cast<RealType>(1.54791925441839372663e-1),
+            static_cast<RealType>(1.85626981728559445893e-2),
+            static_cast<RealType>(1.01414235673220405086e-3),
+            static_cast<RealType>(1.63385654535791481980e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.6772e-18
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(8.90469147411748292410e-2),
+            static_cast<RealType>(2.76033447621178662228e-2),
+            static_cast<RealType>(3.26577485081539607943e-3),
+            static_cast<RealType>(1.77755752909150255339e-4),
+            static_cast<RealType>(4.20716551767396206445e-6),
+            static_cast<RealType>(3.19415703637929092564e-8),
+            static_cast<RealType>(-1.79900915228302845362e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(4.36499987260915480890e-1),
+            static_cast<RealType>(7.67544181756713372678e-2),
+            static_cast<RealType>(6.83535263652329633233e-3),
+            static_cast<RealType>(3.15983778969051850073e-4),
+            static_cast<RealType>(6.84144567273078698399e-6),
+            static_cast<RealType>(5.00300197147417963939e-8),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.5678e-20
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(4.35157264931262089762e-2),
+            static_cast<RealType>(8.46833474333913742597e-3),
+            static_cast<RealType>(6.43769318301002170686e-4),
+            static_cast<RealType>(2.39440197089740502223e-5),
+            static_cast<RealType>(4.45572968892675484685e-7),
+            static_cast<RealType>(3.76071815793351687179e-9),
+            static_cast<RealType>(1.04851094362145160445e-11),
+            static_cast<RealType>(-8.50646541795105885254e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(2.59832721225510968607e-1),
+            static_cast<RealType>(2.75929030381330309762e-2),
+            static_cast<RealType>(1.53115657043391090526e-3),
+            static_cast<RealType>(4.70173086825204710446e-5),
+            static_cast<RealType>(7.76185172490852556883e-7),
+            static_cast<RealType>(6.10512879655564540102e-9),
+            static_cast<RealType>(1.64522607881748812093e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.2534e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(2.11253031965493064317e-2),
+            static_cast<RealType>(1.36656844320536022509e-3),
+            static_cast<RealType>(2.99036224749763963099e-5),
+            static_cast<RealType>(2.54538665523638998222e-7),
+            static_cast<RealType>(6.79286608893558228264e-10),
+            static_cast<RealType>(-6.92803349600061706079e-16),
+            static_cast<RealType>(5.47233092767314029032e-19),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(9.71506209641408410168e-2),
+            static_cast<RealType>(3.52744690483830496158e-3),
+            static_cast<RealType>(5.85142319429623560735e-5),
+            static_cast<RealType>(4.29686638196055795330e-7),
+            static_cast<RealType>(1.06586221304077993137e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(x) < 8) {
+        RealType t = log2(ldexp(x, -6));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.8057e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(6.60754766433212615409e-1),
+            static_cast<RealType>(2.47190065739055522599e-1),
+            static_cast<RealType>(4.17560046901040308267e-2),
+            static_cast<RealType>(3.71520821873148657971e-3),
+            static_cast<RealType>(2.03659383008528656781e-4),
+            static_cast<RealType>(2.52070598577347523483e-6),
+            static_cast<RealType>(-1.63741595848354479992e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(3.92836792184266080580e-1),
+            static_cast<RealType>(6.64332913820571574875e-2),
+            static_cast<RealType>(5.59456053716889879620e-3),
+            static_cast<RealType>(3.44201583106671507027e-4),
+            static_cast<RealType>(2.74554105716911980435e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else if (ilogb(x) < 16) {
+        RealType t = log2(ldexp(x, -8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.5585e-18
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(6.44802371584831601817e-1),
+            static_cast<RealType>(2.74177359656349204309e-1),
+            static_cast<RealType>(5.53659240731871433983e-2),
+            static_cast<RealType>(6.97653365560511851744e-3),
+            static_cast<RealType>(6.17058143529799037402e-4),
+            static_cast<RealType>(3.94979574476108021136e-5),
+            static_cast<RealType>(1.88315864113369221822e-6),
+            static_cast<RealType>(6.10941845734962836501e-8),
+            static_cast<RealType>(1.39403332890347813312e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(4.32345127287830884682e-1),
+            static_cast<RealType>(8.70500634789942065799e-2),
+            static_cast<RealType>(1.09253956356393590470e-2),
+            static_cast<RealType>(9.72576825490118007977e-4),
+            static_cast<RealType>(6.18656322285414147985e-5),
+            static_cast<RealType>(2.96375876501823390564e-6),
+            static_cast<RealType>(9.58622809886777038970e-8),
+            static_cast<RealType>(2.19059124630695181004e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else if (ilogb(x) < 32) {
+        RealType t = log2(ldexp(x, -16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.4773e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(6.36685748306554972132e-1),
+            static_cast<RealType>(2.22217783148381285219e-1),
+            static_cast<RealType>(3.79173960692559280353e-2),
+            static_cast<RealType>(4.13394722917837684942e-3),
+            static_cast<RealType>(3.18141233442663766089e-4),
+            static_cast<RealType>(1.79745613243740552736e-5),
+            static_cast<RealType>(7.47632665728046334131e-7),
+            static_cast<RealType>(2.18258684729250152138e-8),
+            static_cast<RealType>(3.93038365129320422968e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(3.49087806008685701060e-1),
+            static_cast<RealType>(5.95568283529034601477e-2),
+            static_cast<RealType>(6.49386742119035055908e-3),
+            static_cast<RealType>(4.99721374204563274865e-4),
+            static_cast<RealType>(2.82348248031305043777e-5),
+            static_cast<RealType>(1.17436903872210815656e-6),
+            static_cast<RealType>(3.42841159307801319359e-8),
+            static_cast<RealType>(6.17382517100568714012e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else if (ilogb(x) < 64) {
+        RealType t = log2(ldexp(x, -32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.1441e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(6.36619774420718062663e-1),
+            static_cast<RealType>(2.68594096777677177874e-1),
+            static_cast<RealType>(5.50713044649497737064e-2),
+            static_cast<RealType>(7.26574134143434960446e-3),
+            static_cast<RealType>(6.89173530168387629057e-4),
+            static_cast<RealType>(4.87688310559244353811e-5),
+            static_cast<RealType>(2.84218580121660744969e-6),
+            static_cast<RealType>(9.65240367429172366675e-8),
+            static_cast<RealType>(5.21722720068664704240e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(4.21906621389193043384e-1),
+            static_cast<RealType>(8.65058026826346828750e-2),
+            static_cast<RealType>(1.14129998157398060009e-2),
+            static_cast<RealType>(1.08255124950652385121e-3),
+            static_cast<RealType>(7.66059006900869004871e-5),
+            static_cast<RealType>(4.46449501653114622960e-6),
+            static_cast<RealType>(1.51619602364037777665e-7),
+            static_cast<RealType>(8.19520132288940649002e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else {
+        result = 2 / (constants::pi<RealType>() * x);
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 2.6472e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34761298487625202628055609797763667089e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67589195401255255724121983550745957195e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07502511824371206858547365520593277966e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58354381655514028012912292026393699991e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.26470588572701739953294573496059174764e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09494168186680012705692462031819276746e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47385718073281027400744626077865581325e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69107567947502492044754464589464306928e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39641345689672620514703813504927833352e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27003930699448633502508661352994055898e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26124673422692247711088651516214728305e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92103390710025598612731036700549416611e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.49572523814120679048097861755172556652e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.50719933268462244255954307285373705456e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05332427324361912631483249892199461926e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46280417679002004953145547112352398783e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.10429833573651169023447466152999802738e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.63535585818618617796313647799029559407e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24103322502244219003850826414302390557e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38359438431541204276767900393091886363e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16687583686405832820912406970664239423e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31451667102532056871497958974899742424e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31646175307279119467894327494418625431e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.22334681489114534492425036698050444462e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86326948577818727263376488455223120476e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53867591038308710930446815360572461884e-7),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2387e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22133240358047652363270514524313049653e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35860518549481281929441026718420080571e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89900189271177970319691370395978805326e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84682995288088652145572170736339265315e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.10748045562955323875797887939420022326e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00246325517647746481631710824413702051e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02998394686245118431020407235000441722e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06095284318730009040434594746639110387e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91754425158654496372516241124447726889e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.37288564874584819097890713305968351561e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77487285800889132325390488044487626942e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41654614425073025870130302460301244273e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13058739144695658589427075788960660400e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11792528400843967390452475642793635419e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28794756779085737559146475126886069030e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29339015472607099189295465796550367819e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53434372685847620864540166752049026834e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17610372643685730837081191600424913542e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64455304425865128680681864919048610730e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62357689170951502920019033576939977973e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.89912258835489782923345357128779660633e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76323449710934127736624596886862488066e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21524231900555452527639738371019517044e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2281e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95892137955791216377776422765473500279e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.65957634570689820998348206103212047458e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34657686985192350529330481818991619730e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43985500841002490334046057189458709493e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09876223028004323158413173719329449720e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.04194660038290410425299531094974709019e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09780604136364125990393172827373829860e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02676079027875648517286351062161581740e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30298199082321832830328345832636435982e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33633965123855006982811143987691483957e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46384114966020719170903077536685621119e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07058773850795175564735754911699285828e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.76765053309825506619419451346428518606e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89758965744489334954041814073547951925e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65985298582650601001220682594742473012e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.81017086203232617734714711306180675445e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14481301672800918591822984940714490526e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.90605450026850685321372623938646722657e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42447999818015246265718131846902731574e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83426770079526980292392341278413549820e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65073357441521690641768959521412898756e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.90437453546925074707222505750595530773e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.63458145595422196447107547750737429872e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.94457070577990681786301801930765271001e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80986568964737305842778359322566801845e-11),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.3269e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73159318667565938775602634998889798568e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95149372103869634275319490207451722385e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.87504411659823400690797222216564651939e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94571385159717824767058200278511014560e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71656210265434934399632978675652106638e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51248899957476233641240573020681464290e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74490600490886011190565727721143414249e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07537323853509621126318424069471060527e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59167561354023258538869598891502822922e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72608361427131857269675430568328018022e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54143016370650707528704927655983490119e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07936446902207128577031566135957311260e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.73506415766100115673754920344659223382e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66918494546396383814682000746818494148e-21),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34854858486201481385140426291984169791e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.25379978655428608198799717171321453517e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01905621587554903438286661709763596137e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31293339647901753103699339801273898688e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22793491714510746538048140924864505813e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45360205736839126407568005196865547577e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20918479556021574336548106785887700883e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.91450617548036413606169102407934734864e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59940586452863361281618661053014404930e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.93918243796178165623395356401173295690e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14578198844767847381800490360878776998e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.25951924258762195043744665124187621023e-13),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.8719e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.90469147411748292410422813492550092930e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20196598836093298098360769875443462143e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92579652651763461802771336515384878994e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.50439147419887323351995227585244144060e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13214742069751393867851080954754449610e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29648648382394801501422003194522139519e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80420399625810952886117129805960917210e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.73059844436212109742132138573157222143e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66835461298243901306176013397428732836e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61808423521250921041207160217989047728e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39300098366988229510997966682317724011e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09447823064238788960158765421669935819e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76962456941948786610101052244821659252e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56004343709960620209823076030906442732e-25),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.22996422556023111037354479836605618488e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05244279198013248402385148537421114680e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72477335169177427114629223821992187549e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76404568980852320252614006021707040788e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79959793426748071158513573279263946303e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93524788220877416643145672816678561612e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.46692111397574773931528693806744007042e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20764040991846422990601664181377937629e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84452460717254884659858711994943474216e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30317379590981344496250492107505244036e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83736710938966780518785861828424593249e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72943283576264035508862984899450025895e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77791087299927741360821362607419036797e-18),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.3269e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35157264931262089761621934621402648954e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51407493866635569361305338029611888082e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30886132894858313459359493329266696766e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02488735053241778868198537544867092626e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12676055870976203566712705442945186614e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.14136757304740001515364737551021389293e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01514327671186735593984375829685709678e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63783530594707477852365258482782354261e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67623013776194044717097141295482922572e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01397549144502050693284434189497148608e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.16720246008161901837639496002941412533e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70776057051329137176494230292143483874e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02838174509144355795908173352005717435e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07343581702278433243268463675468320030e-30),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13166129191902183515154099741529804400e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.58587877615076239769720197025023333190e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14619910799508944167306046977187889556e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66671218029939293563302720748492945618e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67192565058098643223751044962155343554e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68397391192695060767615969382391508636e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94009678375859797198831431154760916459e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91901267471125881702216121486397689200e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83697721782125852878533856266722593909e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65409094893730117412328297801448869154e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.04202174160401885595563150562438901685e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82104711207466136473754349696286794448e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0937e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11253031965493064317003259449214452745e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66886306590939856622089350675801752704e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77786526684921036345823450504680078696e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20343424607276252128027697088363135591e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29196073776799916444272401212341853981e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.92132293422644089278551376756604946339e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07465707745270914645735055945940815947e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54424785613626844024154493717770471131e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74829439628215654062512023453584521531e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.03470347880592072854295353687395319489e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21113051919776165865529140783521696702e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.83719812218384126931626509884648891889e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.41009036423458926116066353864843586169e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04965807080681693416200699806159303323e-34),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06133417626680943824361625182288165823e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87991711814130682492211639336942588926e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.98342969282034680444232201546039059255e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41940454945139684365514171982891170420e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.90481418770909949109210069475433304086e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25451856391453896652473393039014954572e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36349120987010174609224867075354225138e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94716367816033715208164909918572061643e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67466470065187852967064897686894407151e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31509787633232139845762764472649607555e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93147765040455324545205202900563337981e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07370974123835247210519262324524537634e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(x) < 8) {
+        RealType t = log2(ldexp(x, -6));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.1671e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60754766433212615408805486898847664740e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.76143516602438873568296501921670869526e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.25254763859315398784817302471631188095e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.58650277225655302085863010927524053686e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92227773746592457803942136197158658110e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77170481512334811333255898903061802339e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97864282716826576471164657368231427231e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.44243747123065035356982629201975914275e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54592817957461998135980337838429682406e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.52110831321633404722419425039513444319e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75030698219998735693228347424295850790e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08894662488905377548940479566994482806e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11472306961184868827300852021969296872e-12),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04202386226609593823214781180612848612e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07648212684952405730772649955008739292e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50646784687432427178774105515508540021e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02521400964223268224629095722841793118e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35374201758795213489427690294679848997e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67290684433876221744005507243460683585e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83834977086311601362115427826807705185e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43236252790815493406777552261402865674e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17693398496807851224497995174884274919e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34161978291568722756523120609497435933e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10819003833429876218381886615930538464e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75674945131892236663189757353419870796e-12),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else if (ilogb(x) < 16) {
+        RealType t = log2(ldexp(x, -8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.8517e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44802371584831601817146389426921705500e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.32962058761590152378007743852342151897e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93461601407042255925193793376118641680e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.55533612685775705468614711945893908392e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76358919439168503100357154639460097607e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74131615534562303144125602950691629908e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34470363614899824502654995633001232079e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29020592733459891982428815398092077306e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65794017754267756566941255128608603072e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78550878208007836345763926019855723350e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00524022519953193863682806155339574713e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75977976583947697667784048133959750133e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89287460618943500291479647438555099783e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27647727947590174240069836749437647626e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70582660582766959108625375415057711766e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.68499175244574169768386088971844067765e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84493774639724473576782806157757824413e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03526708437207438952843827018631758857e-20),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.88770963972332750838571146142568699263e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60273143287476497658795203149608758815e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34393017137025732376113353720493995469e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77086140458900002000076127880391602253e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30613589576665986239534705717153313682e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25355055770024448240128702278455001334e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.16820392686312531160900884133254461634e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17518073757769640772428097588524967431e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80469112215756035261419102003591533407e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57898123952200478396366475124854317231e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47675840885248141425130440389244781221e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97339213584115778189141444065113447170e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85845602624148484344802432304264064957e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67971825826765902713812866354682255811e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.78795543918651402032912195982033010270e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.18166403020940538730241286150437447698e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47995312365747437038996228794650773820e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else if (ilogb(x) < 32) {
+        RealType t = log2(ldexp(x, -16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.5315e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36685748306554972131586673701426039950e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75892450098649456865500477195142009984e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.77167300709199375935767980419262418694e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76657987434662206916119089733639111866e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55003354250569146980730594644539195376e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.64102805555049236216024194001407792885e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36247488122195469059567496833809879653e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63710218182036673197103906176200862606e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60629446465979003842091012679929186607e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22712292003775206105713577811447961965e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.28539646473359376707298867613704501434e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47893806904387088760579412952474847897e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86809622035928392542821045232270554753e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47935154807866802001012566914901169147e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40623855123515207599160827187101517978e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60738876249485914019826585865464103800e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29257874466803586327275841282905821499e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.06781616867813418930916928811492801723e-31),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33391038950576592915531240096703257292e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06598147816889621749840662500099582486e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21997213397347849640608088189055469954e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18596259982438670449688554459343971428e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86085649929528605647139297483281849158e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28178198640304770056854166079598253406e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57155234493390297220982397633114062827e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03771216318243964850930846579433365529e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49837117625052865973189772546210716556e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.87302306206908338457432167186661027909e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32312467403555290915110093627622951484e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50516769529388616534895145258103120804e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03618834064000582669276279973634033592e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.49205787058220657972891114812453768100e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23730041323226753771240724078738146658e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60115971929371066362271909482282503973e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else if (ilogb(x) < 64) {
+        RealType t = log2(ldexp(x, -32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0538e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619774420718062663274858007687066488e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30268560944740805268408378762250557522e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09208091036436297425427953080968023835e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.74943166696408995577495065480328455423e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03759498310898586326086395411203400316e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67261192787197720215143001944093963953e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42934578939412238889174695091726883834e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.32436794923711934610724023467723195718e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35077626369701051583611707128788137675e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40836846523442062397035620402082560833e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98783806012035285862106614557391807137e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53869567415427145730376778932236900838e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.76521311340629419738016523643187305675e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41298928566351198899106243930173421965e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85552706372195482059144049293491755419e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89780987301820615664133438159710338126e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37965575161090804572561349091024723962e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58944184323201470938493323680744408698e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35050162268451658064792430214910233545e-40),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61705010674524952791495931314010679992e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.42782564900556152436041716057503104160e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.46038982970912591009739894441944631471e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.34223936001873800295785537132905986678e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19812900355867749521882613003222797586e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24521111398180921205229795007228494287e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.93429394949368809594897465724934596442e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69259071867341718986156650672535675726e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16370379759046264903196063336023488714e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12248872253003553623419554868303473929e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12936649424676303532477421399492615666e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37683645610869385656713212194971883914e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21951837982136344238516771475869548147e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91465509588513270823718962232280739302e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98107277753797219142748868489983891831e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.16715818685246698314459625236675887448e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.77987471623502330881961633434056523159e-26),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else if (ilogb(x) < 128) {
+        RealType t = log2(ldexp(x, -64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.2309e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581344040890134127619524371e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72522424358877592972375801826826390634e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74749058021341871895402838175268752603e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.95136532385982168410320513292144834602e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10500792867575154180588502397506694341e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48101840822895419487033057691746982216e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22577211783918426674527460572438843266e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30499751609793470641331626931224574780e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07043764292750472900578756659402327450e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28345765853059515246787820662932506931e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48838700086419232178247558529254516870e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51015062047870581993810118835353083110e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19087584836023628483830612541904830502e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74405125338538967114280887107628943111e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34493122865874905104884954420903910585e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.33794243240353561095702650271950891264e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07931799710240978706633227327649731325e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60935135769719557933955672887720342220e-23),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28077223152164982690351137450174450926e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02813709168750724641877726632095676090e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24899754437233214579860634420198464016e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27313166830073839667108783881090842820e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01803599095361490387188839658640162684e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63782732057408167492988043000134055952e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.62068163155799642595981598061154626504e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68143951757351157612001096085234448512e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72843955600132743549395255544732133507e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33795283380674591910584657171640729449e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08452802793967494363851669977089389376e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87062340827301546650149031133192913586e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31034562935470222182311379138749593572e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39579834094849668082821388907704276985e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46680056726416759571957577951115309094e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69538874529209016624246362786229032706e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52796320119313458991885552944744518437e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * x);
+    }
+    else {
+        result = 2 / (constants::pi<RealType>() * x);
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x >= -1) {
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.8279e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(9.61609610406317335842e-2),
+            static_cast<RealType>(3.91836314722738553695e-1),
+            static_cast<RealType>(6.79862925205625107133e-1),
+            static_cast<RealType>(6.52516594941817706368e-1),
+            static_cast<RealType>(3.78594163612581127974e-1),
+            static_cast<RealType>(1.37741592243008345389e-1),
+            static_cast<RealType>(3.16100502353317199197e-2),
+            static_cast<RealType>(3.94935603975622336575e-3),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.76863983252615276767e0),
+            static_cast<RealType>(1.81486018095087241378e0),
+            static_cast<RealType>(1.17295504548962999723e0),
+            static_cast<RealType>(5.33998066342362562313e-1),
+            static_cast<RealType>(1.66508320794082632235e-1),
+            static_cast<RealType>(3.42192028846565504290e-2),
+            static_cast<RealType>(3.94691613177524994796e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.3675e-17
+        BOOST_MATH_STATIC const RealType P[11] = {
+            static_cast<RealType>(7.07114056489178077423e-4),
+            static_cast<RealType>(7.35277969197058909845e-3),
+            static_cast<RealType>(3.45402694579204809691e-2),
+            static_cast<RealType>(9.62849773112695332289e-2),
+            static_cast<RealType>(1.75738736725818007992e-1),
+            static_cast<RealType>(2.18309266582058485951e-1),
+            static_cast<RealType>(1.85680388782727289455e-1),
+            static_cast<RealType>(1.06177394398691169291e-1),
+            static_cast<RealType>(3.94880388335722224211e-2),
+            static_cast<RealType>(9.46543177731050647162e-3),
+            static_cast<RealType>(1.50949646857411896396e-3),
+        };
+        BOOST_MATH_STATIC const RealType Q[11] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.19520021153535414164e0),
+            static_cast<RealType>(2.24057032777744601624e0),
+            static_cast<RealType>(1.63635577968560162720e0),
+            static_cast<RealType>(1.58952087228427876880e0),
+            static_cast<RealType>(7.63062254749311648018e-1),
+            static_cast<RealType>(4.65805990343825931327e-1),
+            static_cast<RealType>(1.45821531714775598887e-1),
+            static_cast<RealType>(5.42393925507104531351e-2),
+            static_cast<RealType>(9.84276292481407168381e-3),
+            static_cast<RealType>(1.54787649925009672534e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        const static RealType lambda_bias = static_cast<RealType>(1.45158270528945486473); // (= log(pi/2)+1)
+
+        RealType sigma = exp(-x * constants::pi<RealType>() / 2 - lambda_bias);
+        RealType s = exp(-sigma) / sqrt(sigma);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 6.6532e-17
+            BOOST_MATH_STATIC const RealType P[9] = {
+                static_cast<RealType>(3.71658823632747235572e-1),
+                static_cast<RealType>(2.81493346318174084721e-1),
+                static_cast<RealType>(1.80052521696460721846e-1),
+                static_cast<RealType>(7.65907659636944822120e-2),
+                static_cast<RealType>(2.33352148213280934280e-2),
+                static_cast<RealType>(5.02308701022480574067e-3),
+                static_cast<RealType>(6.29239919421134075502e-4),
+                static_cast<RealType>(8.36993181707604609065e-6),
+                static_cast<RealType>(-8.38295154747385945293e-6),
+            };
+            BOOST_MATH_STATIC const RealType Q[9] = {
+                static_cast<RealType>(1),
+                static_cast<RealType>(6.62107509936390708604e-1),
+                static_cast<RealType>(4.72501892305147483696e-1),
+                static_cast<RealType>(1.84446743813050604353e-1),
+                static_cast<RealType>(5.99971792581573339487e-2),
+                static_cast<RealType>(1.24751029844082800143e-2),
+                static_cast<RealType>(1.56705297654475773870e-3),
+                static_cast<RealType>(2.36392472352050487445e-5),
+                static_cast<RealType>(-2.11667044716450080820e-5),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -5.1328125) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.6331e-17
+            BOOST_MATH_STATIC const RealType P[10] = {
+                static_cast<RealType>(3.97500903816385095134e-1),
+                static_cast<RealType>(5.08559630146730380854e-1),
+                static_cast<RealType>(2.99190443368166803486e-1),
+                static_cast<RealType>(1.07339363365158174786e-1),
+                static_cast<RealType>(2.61694301269384158162e-2),
+                static_cast<RealType>(4.58386867966451237870e-3),
+                static_cast<RealType>(5.80610284231484509069e-4),
+                static_cast<RealType>(5.07249042503156949021e-5),
+                static_cast<RealType>(2.91644292826084281875e-6),
+                static_cast<RealType>(9.75453868235609527534e-12),
+            };
+            BOOST_MATH_STATIC const RealType Q[9] = {
+                static_cast<RealType>(1),
+                static_cast<RealType>(1.27376091725485414303e0),
+                static_cast<RealType>(7.49829208702328578188e-1),
+                static_cast<RealType>(2.69157374996960976399e-1),
+                static_cast<RealType>(6.55795320040378662663e-2),
+                static_cast<RealType>(1.14912646428788757804e-2),
+                static_cast<RealType>(1.45541420582309879973e-3),
+                static_cast<RealType>(1.27135040794481871472e-4),
+                static_cast<RealType>(7.31138551538712031061e-6),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x >= -1) {
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2055e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.61609610406317335842332400044553397267e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74152295981095898203847178356629061821e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58642905042588731020840168744866124345e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69370085525311304330141932309908104187e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14888713497930800611167630826754270499e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69123861559106636252620023643265102867e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74273532954853421626852458737661546439e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.73534665976007761924923962996725209700e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42543389723715037640714282663089570985e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05120903211852044362181935724880384488e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49586169587615171270941258051088627885e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46047939521303565932576405363107506886e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68248726161641913236972878212857788320e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60663638253775180681171554635861859625e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76463460016745893121574217030494989443e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.08380585744336744543979680558024295296e-12),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66458574743150749245922924142120646408e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87010262350733534202724862784081296105e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.51107149980251214963849267707173045433e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.71158207369578457239679595370389431171e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37188705505573668092513124472448362633e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95647530096628718695081507038921183627e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30278895428001081342301218278371140110e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61322060563420594659487640090297303892e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30529729106312748824241317854740876915e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90465740298431311519387111139787971960e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.92760416706194729215037805873466599319e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02070496615845146626690561655353212151e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72080705566714681586449384371609107346e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76433504120625478720883079263866245392e-6),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.4133e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07114056489178077422539043012078031613e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.18006784954579394004360967455655021959e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.60309646092161147676756546417366564213e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13479499932401667065782086621368143322e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68587439643060549883916236839613331692e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12366494749830793876926914920462629077e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70739124754664545339208363069646589169e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04073482998938337661285862393345731336e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94833709787596305918524943438549684109e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50214412821697972546222929550410139790e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.43105005523280337071698704765973602884e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85396789833278250392015217207198739243e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05690359993570736607428746439280858381e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.17297815188944531843360083791153470475e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03913601629627587800587620822216769010e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61963034255210565218722882961703473760e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.99502258440875586452963094474829571000e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.66563884565518965562535171848480872267e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23954896921292896539048530795544784261e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77934931846682015134812629288297137499e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.85052416252910403272283619201501701345e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45276747409453182009917448097687214033e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87717215449690275562288513806049961791e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96583424263422661540930513525639950307e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73001838976297286477856104855182595364e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29209801725936746054703603946844929105e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.31809396176316042818100839595926947461e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62125101720695030847208519302530333864e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22912823173107974750307098204717046200e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28404310708078592866397210871397836013e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33433860799478110495440617696667578486e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01779942752411055394079990371203135494e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60870827161929649807734240735205100749e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43275518144078080917466090587075581039e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80287554756375373913082969626543154342e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00697535360590561244468004025972321465e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23883308105457761862174623664449205327e-6),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        const static RealType lambda_bias = BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.451582705289454864726195229894882143572); // (= log(pi/2)+1)
+
+        RealType sigma = exp(-x * constants::pi<RealType>() / 2 - lambda_bias);
+        RealType s = exp(-sigma) / sqrt(sigma);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 9.2619e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[19] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71658823632747235572391863987803415545e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20402452680758356732340074285765302037e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53870483364594487885882489517365212394e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.73525449564340671962525942038149851804e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.67339872142847248852186397385576389802e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60644488744851390946293970736919678433e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33042051950636491987775324999025538357e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13846819893538329440033115143593487041e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41648498082970622389678372669789346515e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74006867625631068946791714035394785978e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.12238896415831258936563475509362795783e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88070293465108791701905953972140154151e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24813015654516014181209691083399092303e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64092873079064926551281731026589848877e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.09892207654972883190432072151353819511e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.86990125202059013860642688739159455800e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62986611607135348214220687891374676368e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07567013469555215514702758084138467446e-12),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84619752008414239602732630339626773669e-14),
+            };
+            BOOST_MATH_STATIC const RealType Q[17] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28669950018285475182750690468224641923e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12421557061005325313661189943328446480e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68376064122323574208976258468929505299e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22010354939562426718305463635398985290e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13795955314742199207524303721722785075e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90452274425830801819532524004271355513e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38324283887272345859359008873739301544e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15232844484261129757743512155821350773e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79562237779621711674853020864686436450e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64370777996591099856555782918006739330e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02327782881305686529414731684464770990e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27181015755595543140221119020333695667e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01121287947061613072815935956604529157e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44038164966032378909755215752715620878e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.39138685106442954199109662617641745618e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.83317957765031605023198891326325990178e-10),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -6.875) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 4.9208e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[20] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97500903816385095134217223320239082420e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02058997410109156148729828665298333233e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30492992901887465108077581566548743407e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08695332228530157560495896731847709498e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54469321766529692240388930552986490213e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00543201281990041935310905273146022998e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08633547932070289660163851972658637916e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15432691192536747268886307936712580254e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.46179071338871656505293487217938889935e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45295210106393905833273975344579255175e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34638105523514101671944454719592801562e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15786069528793080046638424661219527619e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54781306296697568446848038567723598851e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31977279631544580423883461084970429143e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56616743805004179430469197497030496870e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60913959062328670735884196858280987356e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91123354712008822789348244888916948822e-11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82453513391091361890763400931018529659e-12),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12671859603774617133607658779709622453e-14),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03211544596001317143519388487481133891e-20),
+            };
+            BOOST_MATH_STATIC const RealType Q[19] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56188463983858614833914386500628633184e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27273165410457713017446497319550252691e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72495122287308474449946195751088057230e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64049710819255633163836824600620426349e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.53329810455612298967902432399110414761e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72302144446588066369304547920758875106e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.90680157119357595265085115978578965640e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87039785683949322939618337154059874729e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64199530594973983893552925652598080310e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88147828823178863054226159776600116931e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91569503818223078110818909039307983575e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.89289385694964650198403071737653842880e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.32154679053642509246603754078168127853e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43239674842248090516375370051832849701e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03349866320207008385913232167927124115e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98307302768178927108235662166752511325e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07996042577029996321821937863373306901e-12),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53576500935979732855511826033727522138e-13),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 53>& tag) {
+    if (x >= 0) {
+        return complement ? landau_cdf_plus_imp_prec(x, tag) : 1 - landau_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - landau_cdf_minus_imp_prec(x, tag) : landau_cdf_minus_imp_prec(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 113>& tag) {
+    if (x >= 0) {
+        return complement ? landau_cdf_plus_imp_prec(x, tag) : 1 - landau_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - landau_cdf_minus_imp_prec(x, tag) : landau_cdf_minus_imp_prec(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType landau_cdf_imp(const landau_distribution<RealType, Policy>& dist, const RealType& x, bool complement) {
+    //
+    // This calculates the cdf of the Landau distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::cdf(landau<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+    RealType bias = dist.bias();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale + bias;
+
+    result = landau_cdf_imp_prec(u, complement, tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.375) {
+        RealType t = p - static_cast < RealType>(0.375);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 3.0596e-17
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(3.74557416577759554506e-2),
+            static_cast<RealType>(3.87808262376545756299e0),
+            static_cast<RealType>(4.03092288183382979104e0),
+            static_cast<RealType>(-1.65221829710249468257e1),
+            static_cast<RealType>(-6.99689838230114367276e0),
+            static_cast<RealType>(1.51123479911771488314e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(4.37863773851525662884e-1),
+            static_cast<RealType>(-6.35020262707816744534e0),
+            static_cast<RealType>(3.07646508389502660442e-1),
+            static_cast<RealType>(9.72566583784248877260e0),
+            static_cast<RealType>(-2.72338088170674280735e0),
+            static_cast<RealType>(-1.58608957980133006476e0),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.25) {
+        RealType t = p - static_cast < RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 5.2780e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(-4.17764764050720190117e-1),
+            static_cast<RealType>(1.27887601021900963655e0),
+            static_cast<RealType>(1.80329928265996817279e1),
+            static_cast<RealType>(2.35783605878556791719e1),
+            static_cast<RealType>(-2.67160590411398800149e1),
+            static_cast<RealType>(-2.36192101013335692266e1),
+            static_cast<RealType>(8.30396110938939237358e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(5.37459525158081633669e0),
+            static_cast<RealType>(2.35696607501498012129e0),
+            static_cast<RealType>(-1.71117034150268575909e1),
+            static_cast<RealType>(-6.72278235529877170403e0),
+            static_cast<RealType>(1.27763043804603299034e1),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - static_cast < RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 6.3254e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-8.77109518013577785811e-1),
+            static_cast<RealType>(-1.03442936529923615496e1),
+            static_cast<RealType>(-1.03389868296950570121e1),
+            static_cast<RealType>(2.01575691867458616553e2),
+            static_cast<RealType>(4.59115079925618829199e2),
+            static_cast<RealType>(-3.38676271744958577802e2),
+            static_cast<RealType>(-5.38213647878547918506e2),
+            static_cast<RealType>(1.99214574934960143349e2),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.64177607733998839003e1),
+            static_cast<RealType>(8.10042194014991761178e1),
+            static_cast<RealType>(7.61952772645589839171e1),
+            static_cast<RealType>(-2.52698871224510918595e2),
+            static_cast<RealType>(-1.95365983250723202416e2),
+            static_cast<RealType>(2.61928845964255538379e2),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.5192e-18
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(-8.77109518013577852585e-1),
+            static_cast<RealType>(-1.08703720146608358678e0),
+            static_cast<RealType>(-4.34198537684719253325e-1),
+            static_cast<RealType>(-6.97264194535092564620e-2),
+            static_cast<RealType>(-4.20721933993302797971e-3),
+            static_cast<RealType>(-6.27420063107527426396e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(8.38688797993971740640e-1),
+            static_cast<RealType>(2.47558526682310722526e-1),
+            static_cast<RealType>(3.03952783355954712472e-2),
+            static_cast<RealType>(1.39226078796010665644e-3),
+            static_cast<RealType>(1.43993679246435688244e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1196e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-1.16727296241754548410e0),
+            static_cast<RealType>(-1.12325365855062172009e0),
+            static_cast<RealType>(-3.96403456954867129566e-1),
+            static_cast<RealType>(-6.50024588048629862189e-2),
+            static_cast<RealType>(-5.08582387678609504048e-3),
+            static_cast<RealType>(-1.71657051345258316598e-4),
+            static_cast<RealType>(-1.81536405273085024830e-6),
+            static_cast<RealType>(-9.65262938333207656548e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(7.55271574611337871389e-1),
+            static_cast<RealType>(2.16323131117540100488e-1),
+            static_cast<RealType>(2.92693206540519768049e-2),
+            static_cast<RealType>(1.89396907936678571916e-3),
+            static_cast<RealType>(5.20017914327360594265e-5),
+            static_cast<RealType>(4.18896774212993675707e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0763e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-1.78348038398799868409e0),
+            static_cast<RealType>(-7.74779087785346936524e-1),
+            static_cast<RealType>(-1.27121601027522656374e-1),
+            static_cast<RealType>(-9.86675785835385622362e-3),
+            static_cast<RealType>(-3.69510132425310943600e-4),
+            static_cast<RealType>(-6.00811940375633438805e-6),
+            static_cast<RealType>(-3.06397799506512676163e-8),
+            static_cast<RealType>(-7.34821360521886161256e-12),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(3.76606062137668223823e-1),
+            static_cast<RealType>(5.37821995022686641494e-2),
+            static_cast<RealType>(3.62736078766811383733e-3),
+            static_cast<RealType>(1.16954398984720362997e-4),
+            static_cast<RealType>(1.59917906784160311385e-6),
+            static_cast<RealType>(6.41144889614705503307e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.9936e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-2.32474749499506229415e0),
+            static_cast<RealType>(-4.81681429397597263092e-1),
+            static_cast<RealType>(-3.79696253130015182335e-2),
+            static_cast<RealType>(-1.42328672650093755545e-3),
+            static_cast<RealType>(-2.58335052925986849305e-5),
+            static_cast<RealType>(-2.03945574260603170161e-7),
+            static_cast<RealType>(-5.04229972664978604816e-10),
+            static_cast<RealType>(-5.49506755992282162712e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.87186049570056737301e-1),
+            static_cast<RealType>(1.32852903862611979806e-2),
+            static_cast<RealType>(4.45262195863310928309e-4),
+            static_cast<RealType>(7.13306978839226580931e-6),
+            static_cast<RealType>(4.84555343060572391776e-8),
+            static_cast<RealType>(9.65086092007764297450e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.2449e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-2.82318656228158372998e0),
+            static_cast<RealType>(-2.84346379198027589453e-1),
+            static_cast<RealType>(-1.09194719815749710073e-2),
+            static_cast<RealType>(-1.99728160102967185378e-4),
+            static_cast<RealType>(-1.77069359938827653381e-6),
+            static_cast<RealType>(-6.82828539186572955883e-9),
+            static_cast<RealType>(-8.22634582905944543176e-12),
+            static_cast<RealType>(-4.10585514777842307175e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(9.29910333991046040738e-2),
+            static_cast<RealType>(3.27860300729204691815e-3),
+            static_cast<RealType>(5.45852206475929614010e-5),
+            static_cast<RealType>(4.34395271645812189497e-7),
+            static_cast<RealType>(1.46600782366946777467e-9),
+            static_cast<RealType>(1.45083131237841500574e-12),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -128) {
+        RealType t = -log2(ldexp(p, 64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.6453e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-3.29700011190686231229e0),
+            static_cast<RealType>(-1.62920309130909343601e-1),
+            static_cast<RealType>(-3.07152472866757852259e-3),
+            static_cast<RealType>(-2.75922040607620211449e-5),
+            static_cast<RealType>(-1.20144242264703283024e-7),
+            static_cast<RealType>(-2.27410079849018964454e-10),
+            static_cast<RealType>(-1.34109445298156050256e-13),
+            static_cast<RealType>(-3.08843378675512185582e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(4.62324092774919223927e-2),
+            static_cast<RealType>(8.10410923007867515072e-4),
+            static_cast<RealType>(6.70843016241177926470e-6),
+            static_cast<RealType>(2.65459014339231700938e-8),
+            static_cast<RealType>(4.45531791525831169724e-11),
+            static_cast<RealType>(2.19324401673412172456e-14),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -256) {
+        RealType t = -log2(ldexp(p, 128));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.2028e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-3.75666995985336008568e0),
+            static_cast<RealType>(-9.15751436135409108392e-2),
+            static_cast<RealType>(-8.51745858385908954959e-4),
+            static_cast<RealType>(-3.77453552696508401182e-6),
+            static_cast<RealType>(-8.10504146884381804474e-9),
+            static_cast<RealType>(-7.55871397276946580837e-12),
+            static_cast<RealType>(-2.19023097542770265117e-15),
+            static_cast<RealType>(-2.34270094396556916060e-20),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(2.30119177073875808729e-2),
+            static_cast<RealType>(2.00787377759037971795e-4),
+            static_cast<RealType>(8.27382543511838001513e-7),
+            static_cast<RealType>(1.62997898759733931959e-9),
+            static_cast<RealType>(1.36215810410261098317e-12),
+            static_cast<RealType>(3.33957268115953023683e-16),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -512) {
+        RealType t = -log2(ldexp(p, 256));
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.8900e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-4.20826069989721597050e0),
+            static_cast<RealType>(-5.07864788729928381957e-2),
+            static_cast<RealType>(-2.33825872475869133650e-4),
+            static_cast<RealType>(-5.12795917403072758309e-7),
+            static_cast<RealType>(-5.44657955194364350768e-10),
+            static_cast<RealType>(-2.51001805474510910538e-13),
+            static_cast<RealType>(-3.58448226638949307172e-17),
+            static_cast<RealType>(-1.79092368272097571876e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(1.14671758705641048135e-2),
+            static_cast<RealType>(4.98614103841229871806e-5),
+            static_cast<RealType>(1.02397186002860292625e-7),
+            static_cast<RealType>(1.00544286633906421384e-10),
+            static_cast<RealType>(4.18843275058038084849e-14),
+            static_cast<RealType>(5.11960642868907665857e-18),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -1024) {
+        RealType t = -log2(ldexp(p, 512));
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.6777e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-4.65527239540648658214e0),
+            static_cast<RealType>(-2.78834161568280967534e-2),
+            static_cast<RealType>(-6.37014695368461940922e-5),
+            static_cast<RealType>(-6.92971221299243529202e-8),
+            static_cast<RealType>(-3.64900562915285147191e-11),
+            static_cast<RealType>(-8.32868843440595945586e-15),
+            static_cast<RealType>(-5.87602374631705229119e-19),
+            static_cast<RealType>(-1.37812578498484605190e-24),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(5.72000087046224585566e-3),
+            static_cast<RealType>(1.24068329655043560901e-5),
+            static_cast<RealType>(1.27105410419102416943e-8),
+            static_cast<RealType>(6.22649556008196699310e-12),
+            static_cast<RealType>(1.29416254332222127404e-15),
+            static_cast<RealType>(7.89365027125866583275e-20),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else{
+        result = -boost::math::numeric_limits<RealType>::infinity();
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.375) {
+        RealType t = p - 0.375;
+
+        // Rational Approximation
+        // Maximum Absolute Error: 2.5723e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74557416577759248536854968412794870581e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.04379368253541440583870397314012269006e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12622841210720956864564105821904588447e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.57744422491408570970393103737579322242e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.13509711945094517370264490591904074504e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.18322789179144512109337184576079775889e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21447613719864832622177316196592738866e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49076304733407444404640803736504398642e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.96654951892056950374719952752959986017e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73083458872938872583408218098970368331e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.22584946471889320670122404162385347867e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98534922151507267157370682137856253991e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.09159286510191893522643172277831735606e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.86204686129323171601167115178777357431e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.43698274248278918649234376575855135232e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.75240332521434608696943994815649748669e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.31438891446345558658756610288653829009e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10716029191240549289948990305434475528e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10878330779477313404660683539265890549e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.52360069933886703736010179403700697679e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.15864312939821257811853678185928982258e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.10341116017481903631605786613604619909e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29121822170912306719250697890270750964e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56489746112937744052098794310386515793e1),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.25) {
+        RealType t = p - 0.25;
+
+        // Rational Approximation
+        // Maximum Absolute Error: 6.1583e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.17764764050720242897742634974454113395e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.80044093802431965072543552425830082205e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23613318632011593171919848575560968064e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77438013844838858458786448973516177604e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.62569530523012138862025718052954558264e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.02005706260864894793795986187582916504e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29383609355165614630538852833671831839e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.09367754001841471839736367284852087164e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45744413840415901080013900562654222567e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41920296534143581978760545125050148256e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.94857580745127596732818606388347624241e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02847586753967876900858299686189155164e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.29953583375818707785500963989580066735e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27455303165341271216882778791555788609e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41762124591820618604790027888328605963e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30845760165840203715852751405553821601e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.00827370048057599908445731563638383351e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.19621193929561206904250173267823637982e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10514757798726932158537558200005910184e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79738493761540403010052092523396617472e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.94101664430520833603032182296078344870e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.31586575577250608890806988616823861649e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93650751613703379272667745729529916084e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.52472388998113562780767055981852228229e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.01428305018551686265238906201345171425e0),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - 0.125;
+
+        // Rational Approximation
+        // Maximum Absolute Error: 1.3135e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.77109518013577849065583862782160121458e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.05813204052660740589813216397258899528e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.19628607167020425528944673039894592264e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.67162644860799051148361885190022738759e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.05921446080443979618622123764941760355e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26685085062411656483492973256809500654e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17117538916032273474332064444853786788e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.45059470468014721314631799845029715639e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28952226224720891553119529857430570919e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.98502296814963504284919407719496390478e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.10876326351879104392865586365509749012e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.70358021544406445036220918341411271912e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49724346845064961378591039928633169443e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23815021378788622035604969476085727123e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.17262073948257994617723369387261569086e4),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.94901665980514882602824575757494472790e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54328910175180674300123471690771017388e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84847502738788846487698327848593567941e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98451502799612368808473649408471338893e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13744760159877712051088928513298431905e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.20745061658519699732567732006176366700e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68622317228909264645937229979147883985e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.96020751551679746882793283955926871655e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88860541272346724142574740580038834720e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.73454107207588310809238143625482857512e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23165643368613191971938741926948857263e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94832163019509140191456686231012184524e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.26616234097287315007047356261933409072e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24686019847093806280148917466062407447e4),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0498e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.77109518013577849065583862782160155093e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.77585398076895266354686007069850894777e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47522378123968853907102309276280187353e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.63343576432650242131602396758195296288e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.77801189859227220359806456829683498508e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93221663334563259732178473649683953515e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.95272757466323942599253855146019408376e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73624853556509653351605530630788087166e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.41317699770351712612969089634227647374e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.34187895701093934279414993393750297714e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.64090928155753225614302094820737249510e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62401464973350962823995096121206419019e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11979822811128264831341485706314465894e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27257342406829987209876262928379300361e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85505879705365729768944032174855501091e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40983451000610516082352700421098499905e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23459897865681009685618192649929504121e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.28925214684463186484928824536992032740e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67647262682850294124662856194944728023e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88173142080572819772032615169461689904e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07756799117728455728056041053803769069e-11),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.7643e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16727296241754547290632950718657117630e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.97822895738734630842909028778257589627e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.45580723831325060656664869189975355503e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.13835678647158936819843386298690513648e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.64536831064884519168892017327822018961e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93786616484143556451247457584976578832e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.55770899078184683328915310751857391073e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.91778173446401005072425460365992356304e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.59064619930808759325013814591048817325e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.54786673836080683521554567617693797315e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.15917340396537949894051711038346411232e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29633344043292285568750868731529586549e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.27785620133198676852587951604694784533e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.89999814745618370028655821500875451178e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48772690114094395052120751771215809418e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.72281013057830222881716429522080327421e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67186370229687087768391373818683340542e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86988148601521223503040043124617333773e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43321724586909919175166704060749343677e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57428821868404424742036582321713763151e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17165774858274087452172407067668213010e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.78674439389954997342198692571336875222e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82045374895858670592647375231115294575e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40152058277291349447734231472872126483e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34789603687129472952627586273206671442e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38087376350052845654180435966624948994e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34945081364333330292720602508979680233e-16),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.4987e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.78348038398799867332294266481364810762e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42913983922316889357725662957488617770e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.02077376277824482097703213549730657663e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01799479940825547859103232846394236067e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.31954083060883245879038709103320778401e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.14437110578260816704498035546280169833e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.75434713435598124790021625988306358726e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.70722283097111675839403787383067403199e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.22792548204908895458622068271940298849e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23652632092726261134927067083229843867e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.26848751206698811476021875382152874517e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.96683933776920842966962054618493551480e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.65547426464916480144982028081303670013e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01788104318587272115031165074724363239e-19),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.43507070588695242714872431565299762416e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42808541175677232789532731946043918868e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58154336417481327293949514291626832622e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52883128062761272825364005132296437324e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46220303655089035098911370014929809787e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.44776253795594076489612438705019750179e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.09607872267766585503592561222987444825e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24270418154050297788150584301311027023e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52138350835458198482199500102799185922e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28330565098807415367837423320898722351e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61220858078610415609826514581165467762e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31680570822471881148008283775281806658e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61638868324981393463928986484698110415e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.4643e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.32474749499506228416012679106564727824e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.11125026189437033131539969177846635890e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56906722402983201196890012041528422765e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56242546565817333757522889497509484980e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.96189353402888611791301502740835972176e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.25518459970705638772495930203869523701e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23831474024265607073689937590604367113e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.44925744847701733694636991148083680863e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.22891322042392818013643347840386719351e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.72860750698838897533843164259437533533e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.38276123679972197567738586890856461530e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.75010927807240165715236750369730131837e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.39435252454410259267870094713230289131e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32672767938414655620839066142834241506e-23),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71913035066927544877255131988977106466e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07499674325721771035402891723823952963e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73304002376509252638426379643927595435e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45986195188119302051678426047947808068e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39631771214004792103186529415117786213e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82972151546053891838685817022915476363e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.11484161875982352879422494936862579004e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.76886416872139526041488219568768973343e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88160764330501845206576873052377420740e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20653899535657202009579871085255085820e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.86752135706343102514753706859178940399e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08670633989984379551412930443791478495e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96869107941293302786688580824755244599e-24),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.2783e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.82318656228158372073367735499501003484e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.46261040951642110189344545942990712460e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.64741560190892266676648641695426188913e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.28753551974093682831398870653055328683e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.21312013770915263838500863217194379134e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.52436958859473873340733176333088176566e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.19550238139736009251193868269757013675e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.14964971787780037500173882363122301527e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.40304301938210548254468386306034204388e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.86982233973109416660769999752508002999e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47229710624085810190563630948355644978e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.97511060097659395674010001155696382091e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.14321784268659603072523892366718901165e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.69804409248161357472540739283978368871e-27),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85763741109198600677877934140774914793e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51555423561034635648725665049090572375e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14334282485948451530639961260946534734e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15303265564789411158928907568898290494e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33945229806307308687045028827126348382e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.94373901322371782367428404051188999662e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.51420073260465851038482922686870398511e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39366317896256472225488167609473929757e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32986474655329330922243678847674164814e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09408217872473269288530036223761068322e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79051953285476930547217173280519421410e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94530899348454778842122895096072361105e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36452993460830805591166007621343447892e-28),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -128) {
+        RealType t = -log2(ldexp(p, 64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.0123e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.29700011190686230364493911161520668302e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.16175031776740080906111179721128106011e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.33343982195432985864570319341790342784e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.25414682801788504282484273374052405406e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.08812659343240279665150323243172015853e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.33251452861660571881208437468957953698e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.80894766863868081020089830941243893253e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84955155823472122347227298177346716657e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.98637322645260158088125181176106901234e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.24174383760514163336627039277792172744e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.54369979866464292009398761404242103210e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.02572051048819721089874338860693952304e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93656169061287808919601714139458074543e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.14930159772574816086864316805656403181e-31),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27154900915819978649344191118112870943e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77527908332591966425460814882436207182e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88097249712649070373643439940164263005e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33593973311650359460519742789132084170e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34383186845963127931313004467487408932e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.18631088001587612168708294926967112654e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25338215226314856456799568077385137286e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30644713290591280849926388043887647219e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55508263112797212356530850090635211577e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96694528841324480583957017533192805939e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81411886190142822899424539396403206677e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64683991040772975824276994623053932566e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81924597500766743545654858597960153152e-32),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -256) {
+        RealType t = -log2(ldexp(p, 128));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.7624e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.75666995985336007747791649448887723610e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.78960399079208663111712385988217075907e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.74942252057371678208959612011771010491e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.54567765510065203543937772001248399869e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.53093894540157655856029322335609764674e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.83833601054721321664219768559444646069e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.52281007055180941965172296953524749452e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.57322728543196345563534040700366511864e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.52881564741260266060082523971278782893e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.60440334652864372786302383583725866608e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.60691285483339296337794569661545125426e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.29567560587442907936295101146377006338e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.50976593324256906782731237116487284834e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.71308835356954147218854223581309967814e-35),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62732785401286024270119905692156750540e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.40382961238668912455720345718267045656e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10406445824749289380797744206585266357e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.28896702052362503156922190248503561966e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16141168910009886089186579048301366151e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41978644147717141591105056152782456952e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28101353275172857831967521183323237520e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02996252940600644617348281599332256544e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91006255647885778937252519693385130907e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84585864559619959844425689120130028450e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68573963627097356380969264657086640713e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11059307697054035905630311480256015939e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36363494270701950295678466437393953964e-36),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -512) {
+        RealType t = -log2(ldexp(p, 256));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.5621e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.20826069989721596260510558511263035942e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.97440158261228371765435988840257904642e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.03971528248920108158059927256206438162e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.27123766722395421727031536104546382045e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.42341191105097202061646583288627536471e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.27644514375284202188806395834379509517e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.10772944192965679212172315655880689287e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32875098791800400229370712119075696952e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.06204614360238210805757647764525929969e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.43745006810807466452260414216858795476e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.66712970893511330059273629445122037896e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.72840198778128683137250377883245540424e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.91906782399731224228792112460580813901e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.42769091263044979075875010403899574987e-39),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31008507886426704374911618340654350029e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34370110384866123378972324145883460422e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37370811166006065198348108499624387519e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14880753458828334658200185014547794333e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29049942195929206183214601044522500821e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19814793427532184357255406261941946071e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53609759199568827596069048758012402352e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94113467521833827559558236675876398395e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.46066673213431758610437384053309779874e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73781952388557106045597803110890418919e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17225106466605017267996611448679124342e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66384334839761400228111118435077786644e-35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02877806111195383689496741738320318348e-40),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -1024) {
+        RealType t = -log2(ldexp(p, 512));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.4128e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.65527239540648657446629479052874029563e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.49609214609793557370425343404734771058e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.85355312961840000203681352424632999367e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.57243631623079865238801420669247289633e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.64978384343879316016184643597712973486e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.12776782513387823319217102727637716531e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.86985041780323969283076332449881856202e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11665149267826038417038582618446201377e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.44259259232002496618805591961855219612e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.13186466395317710362065595347401054176e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72627240737786709568584848420972570566e-29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.40450635670659803069555960816203368299e-33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.90550919589933206991152832258558972394e-38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.33785768143117121220383154455316199086e-43),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15365252334339030944695314405853064901e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84519641047962864523571386561993045416e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71097850431873211384168229175171958023e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20268329795802836663630276028274915013e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.00891848515558877833795613956071967566e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41031229424613259381704686657785733606e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96506914235910190020798805190634423572e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51190756655665636680121123277286815188e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82855686000721415124702578998188630945e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64298533757673219241102013167519737553e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01176104624443909516274664414542493718e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.41571947895162847564926590304679876888e-39),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84682590163505511580949151048092123923e-44),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -2048) {
+        RealType t = -log2(ldexp(p, 1024));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.3064e-35
+        //LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.09971143249822249471944441552701756051e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.00154235169065403254826962372636417554e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.76859552294270710004718457715250134998e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16331901379268792872208226779641113312e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11590258438815173520561213981966313758e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.17278804462968109983985217400233347654e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.14112976645884560534267524918610371127e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.34652102658577790471066054415469309178e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.85242987373551062800089607781071064493e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.35051904844102317261572436130886083833e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.78478298776769981726834169566536801689e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.22532973433435489030532261530565473605e-37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.25433604872532935232490414753194993235e-41),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.49792182967344082832448065912949074241e-47),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76316274347013095030195725596822418859e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45872499993438633169552184478587544165e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13309566903496793786045158442686362533e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99468690853840997883815075627545315449e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24734617022827960185483615293575601906e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30099852343633243897084627428924039959e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52598626985708878790452436052924637029e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91432956461466900007096548587800675801e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54421383015859327468201269268335476713e-29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55939743284103455997584863292829252782e-33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.73331214275752923691778067125447148395e-38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55089353084326800338273098565932598679e-42),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.85408276119483460035366338145310798737e-48),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4096) {
+        RealType t = -log2(ldexp(p, 2048));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.2337e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.54271778755494231572464179212263718102e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.62737121212473668543011440432166267791e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.10099492629239750693134803100262740506e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56925359477960645026399648793960646858e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.50756287005636861300081510456668184335e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.40657453971177017986596834420774251809e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.25518001919157628924245515302669097090e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.79618511101781942757791021761865762100e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.70242241511341924787722778791482800736e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.87078860748428154402226644449936091766e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84256560347986567120140826597805016470e-35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.71419536977123330712095123316879755172e-40),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.20746149769511232987820552765701234564e-45),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.97544543671003989410397788518265345930e-51),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87980995989632171985079518382705421728e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64234691529227024725728122489224211774e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66149363392892604040036997518509803848e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24365427902918684575287447585802611012e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.88619663977804926166359181945671853793e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.25299846770395565237726328268659386749e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18720305257346902130922082357712771134e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13302092710568005396855019882472656722e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88571994886818976015465466797965950164e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48486836614948668196092864992423643733e-36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72247229252387482782783442901266890088e-41),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76046592638280288324495546006105696670e-46),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.09378739037162732758860377477607829024e-52),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -8192) {
+        RealType t = -log2(ldexp(p, 4096));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.1864e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.98493298246627952401490656857159302716e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.76990949843357898517869703626917264559e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.65042794324685841303461715489845834903e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.10605446678026983843303253925148000808e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.02762962429283889606329831562937730874e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.04105882385534634676234513866095562877e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.76001901462155366759952792570076976049e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.48255362964603267691139956218580946011e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.19422119466925125740484046268759113569e-29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.00719439924828639148906078835399693640e-33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.89921842231783558951433534621837291030e-38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90738353848476619269054038082927243972e-43),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.19893980415902021846066305054394089887e-49),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.85434486590981105149494168639321627061e-55),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43932168599456260558411716919165161381e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.09851997458503734167541584552305867433e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32285843258966417340522520711168738158e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76041266755635729156773747720864677283e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21202253509959946958614664659473305613e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28647335562574024550800155417747339700e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.24953333571478743858014647649207040423e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.41206199962423704137133875822618501173e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34018702380092542910629787632780530080e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41732943566503750356718429150708698018e-39),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29626943299239081309470153019011607254e-44),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.13947437500822384369637881437951570653e-50),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.61766557173110449434575883392084129710e-56),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -16384) {
+        RealType t = -log2(ldexp(p, 8192));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.1568e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.42671464308364892089984144203590292562e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.70165333325375920690660683988390032004e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.51230594210711745541592189387307516997e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.81387249912672866168782835177116953008e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.35308063526816559199325906123032162155e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.67672500455361049516022171111707553191e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.45533142942305626136621399056034449775e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.37422341389432268402917477004312957781e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32123176403616347106899307416474970831e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.07816837508332884935917946618577512264e-36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.95418937882563343895280651308376855123e-41),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.78256110112636303941842779721479313701e-47),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.09423327107440352766843873264503717048e-52),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.12386630925835960782702757402676887380e-58),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19477412398085422408065302795208098500e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27347517804649548179786994390985841531e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15042399607786347684366638940822746311e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84537882580411074097888848210083177973e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.78283331111405789359863743531858801963e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00711555012725961640684514298170252743e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.21370151454170604715234671414141850094e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72008007024350635082914256163415892454e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61182095564217124712889821368695320635e-37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35498047010165964231841033788823033461e-42),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11828030216193307885831734256233140264e-47),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22499308298315468568520585583666049073e-53),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04877018522402283597555167651619229959e-59),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        result = -boost::math::numeric_limits<RealType>::infinity();
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.375) {
+        RealType t = p - static_cast < RealType>(0.375);
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.1286e-20
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(1.31348919222343858178e0),
+            static_cast<RealType>(-1.06646675961352786791e0),
+            static_cast<RealType>(-1.80946160022120488884e1),
+            static_cast<RealType>(-1.53457017598330440033e0),
+            static_cast<RealType>(4.71260102173048370028e1),
+            static_cast<RealType>(4.61048467818771410732e0),
+            static_cast<RealType>(-2.80957284947853532418e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(4.71007453129016317772e0),
+            static_cast<RealType>(1.31946404969596908872e0),
+            static_cast<RealType>(-1.70321827414586880227e1),
+            static_cast<RealType>(-1.11253495615474018666e1),
+            static_cast<RealType>(1.62659086449959446986e1),
+            static_cast<RealType>(7.37109203295032098763e0),
+            static_cast<RealType>(-2.43898047338699777337e0),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.25) {
+        RealType t = p - static_cast < RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.4934e-18
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(2.55081568282045924981e0),
+            static_cast<RealType>(5.38750533719526696218e0),
+            static_cast<RealType>(-2.32797421725187349036e1),
+            static_cast<RealType>(-3.96043566411306749784e1),
+            static_cast<RealType>(3.80609941977115436545e1),
+            static_cast<RealType>(3.35014421131920266346e1),
+            static_cast<RealType>(-1.17490458743273503838e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(7.52439409918350484765e0),
+            static_cast<RealType>(1.34784954182866689668e1),
+            static_cast<RealType>(-9.21002543625052363446e0),
+            static_cast<RealType>(-2.67378141317474265949e1),
+            static_cast<RealType>(2.10158795079902783094e0),
+            static_cast<RealType>(5.90098096212203282798e0),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - static_cast < RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.0795e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(5.68160868054034111703e0),
+            static_cast<RealType>(1.06098927525586705381e2),
+            static_cast<RealType>(5.74509518025029027944e2),
+            static_cast<RealType>(4.91117375866809056969e2),
+            static_cast<RealType>(-2.92607000654635606895e3),
+            static_cast<RealType>(-3.82912009541683403499e3),
+            static_cast<RealType>(2.49195208452006100935e3),
+            static_cast<RealType>(1.29413301335116683836e3),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(2.69603865809599480308e1),
+            static_cast<RealType>(2.63378422475372461819e2),
+            static_cast<RealType>(1.09903493506098212946e3),
+            static_cast<RealType>(1.60315072092792425370e3),
+            static_cast<RealType>(-5.44710468198458322870e2),
+            static_cast<RealType>(-1.76410218726878681387e3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.4618e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(7.10201085067542566037e-1),
+            static_cast<RealType>(6.70042401812679849451e-1),
+            static_cast<RealType>(2.42799404088685074098e-1),
+            static_cast<RealType>(4.80613880364042262227e-2),
+            static_cast<RealType>(6.04473313360581797461e-3),
+            static_cast<RealType>(5.09172911021654842046e-4),
+            static_cast<RealType>(-6.63145317984529265677e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(9.18649629646213969612e-1),
+            static_cast<RealType>(3.66343989541898286306e-1),
+            static_cast<RealType>(8.01010534748206001446e-2),
+            static_cast<RealType>(1.00553335007168823115e-2),
+            static_cast<RealType>(6.30966763237332075752e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.8994e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(7.06147398566773538296e-1),
+            static_cast<RealType>(4.26802162741800814387e-1),
+            static_cast<RealType>(1.32254436707168800420e-1),
+            static_cast<RealType>(2.86055054496737936396e-2),
+            static_cast<RealType>(3.63373131686703931514e-3),
+            static_cast<RealType>(3.84438945816411937013e-4),
+            static_cast<RealType>(1.67768561420296743529e-5),
+            static_cast<RealType>(8.76982374043363061978e-7),
+            static_cast<RealType>(-1.99744396595921347207e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(6.28190787856605587324e-1),
+            static_cast<RealType>(2.10992746593815791546e-1),
+            static_cast<RealType>(4.44397672327578790713e-2),
+            static_cast<RealType>(6.02768341661155914525e-3),
+            static_cast<RealType>(5.46578619531721658923e-4),
+            static_cast<RealType>(3.11116573895074296750e-5),
+            static_cast<RealType>(1.17729007979018602786e-6),
+            static_cast<RealType>(-2.78441865351376040812e-8),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.8685e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(6.48209596014908359251e-1),
+            static_cast<RealType>(2.52611824671691390768e-1),
+            static_cast<RealType>(4.65114070477803399291e-2),
+            static_cast<RealType>(5.23373513313686849909e-3),
+            static_cast<RealType>(3.83113384161076881958e-4),
+            static_cast<RealType>(1.96230077517629530809e-5),
+            static_cast<RealType>(5.83117485120890819338e-7),
+            static_cast<RealType>(6.92614450423703079737e-9),
+            static_cast<RealType>(-3.89531123166658723619e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(3.99413988076189200840e-1),
+            static_cast<RealType>(7.32068638518417765776e-2),
+            static_cast<RealType>(8.15517102642752348889e-3),
+            static_cast<RealType>(6.09126071418098074914e-4),
+            static_cast<RealType>(3.03794079468789962611e-5),
+            static_cast<RealType>(9.32109079205017197662e-7),
+            static_cast<RealType>(1.05435710482490499583e-8),
+            static_cast<RealType>(-6.08748435983193979360e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0253e-17
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(6.36719010559816164896e-1),
+            static_cast<RealType>(2.06504115804034148753e-1),
+            static_cast<RealType>(3.28085429275407182582e-2),
+            static_cast<RealType>(3.31676417519020335859e-3),
+            static_cast<RealType>(2.35502578757551086372e-4),
+            static_cast<RealType>(1.21652240566662139418e-5),
+            static_cast<RealType>(4.57039495420392748658e-7),
+            static_cast<RealType>(1.18090959236399583940e-8),
+            static_cast<RealType>(1.77492646969597480221e-10),
+            static_cast<RealType>(-2.19331267300885448673e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(3.24422807416528490276e-1),
+            static_cast<RealType>(5.15290129833049138552e-2),
+            static_cast<RealType>(5.21051235888272287209e-3),
+            static_cast<RealType>(3.69895399249472399625e-4),
+            static_cast<RealType>(1.91103139437893226482e-5),
+            static_cast<RealType>(7.17882574725373091636e-7),
+            static_cast<RealType>(1.85502934977316481559e-8),
+            static_cast<RealType>(2.78798057565507249164e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.1705e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(6.36619775525705206992e-1),
+            static_cast<RealType>(2.68335698140634792041e-1),
+            static_cast<RealType>(5.49803347535070103650e-2),
+            static_cast<RealType>(7.25018344556356907109e-3),
+            static_cast<RealType>(6.87753481255849254220e-4),
+            static_cast<RealType>(4.86155006277788340253e-5),
+            static_cast<RealType>(2.84604768310787862450e-6),
+            static_cast<RealType>(9.56133960810049319917e-8),
+            static_cast<RealType>(5.26850116571886385248e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1),
+            static_cast<RealType>(4.21500730173440590900e-1),
+            static_cast<RealType>(8.63629077498258325752e-2),
+            static_cast<RealType>(1.13885615328098640032e-2),
+            static_cast<RealType>(1.08032064178130906887e-3),
+            static_cast<RealType>(7.63650498196064792408e-5),
+            static_cast<RealType>(4.47056124637379045275e-6),
+            static_cast<RealType>(1.50189171357721423127e-7),
+            static_cast<RealType>(8.27574227882033707932e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else {
+        result = 2 / (constants::pi<RealType>() * p);
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.4375) {
+        RealType t = p - 0.4375;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.4465e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.08338732735341567163440035550389989556e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.27245731792290848390848202647311435023e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29317169036386848462079766136373749420e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36342136825575317326816540539659955416e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31108700679715257074164180252148868348e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.81863611749256385875333154189074054367e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.11618233433781722149749739225688743102e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45241854625686954669050322459035410227e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09780430233523239228350030812868983054e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42232005306623465126477816911649683789e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.24816048952817367950452675590290535540e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[10] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80464069267458650284548842830642770344e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.28240205449280944407125436342013240876e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94145088402407692372903806765594642452e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30062294376971843436236253827463203953e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47118047660686070998671803800237836970e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.00643263133479482753298910520340235765e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.79460803824650509439313928266686172255e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32647058691746306769699006355256099134e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.59208938705683333141038012302171324544e0),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.375) {
+        RealType t = p - 0.375;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.1929e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31348919222343858173602105619413801018e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.02800226274700443079521563669609776285e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.02091675505570786434803291987263553778e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50141943970885120432710080552941486001e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.93099903417013423125762526465625227789e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.56412922160141953385088141936082249641e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47026602535072645589119440784669747242e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.01068960815396205074336853052832780888e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.86591619131639705495877493344047777421e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.26390836417639942474165178280649450755e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.18212484486162942333407102351878915285e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[10] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97802777458574322604171035748634755981e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.33277809211107726455308655998819166901e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.76555481647551088626503871996617234475e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33146828123660043197526014404644087069e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65159900182434446550785415837526228592e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32391192521438191878041140980983374411e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.12112886240590711980064990996002999330e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.93964809733838306198746831833843897743e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53948309965401603055162465663290204205e1),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.25) {
+        RealType t = p - 0.25;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.2765e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55081568282045925871949387822806890848e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21080883686702131458668798583937913025e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15083151599213113740932148510289036342e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94190629930345397070104862391009053509e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.40768205403470729468297576291723141480e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00001008242667338579153437084294876585e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70900785394455368299616221471466320407e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48947677419760753410122194475234527150e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.01826174001050912355357867446431955195e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.55833657916143927452986099130671173511e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.32953617526068647169047596631564287934e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.32825234826729794599233825734928884074e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.47352171888649528242284500266830013906e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40793887011403443604922082103267036101e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.04348824299115035210088417095305744248e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19680004238557953382868629429538716069e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.31172263627566980203163658640597441741e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.07390429662527773449936608284938592773e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.94877589960261706923147291496752293313e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.94903802003585398809229608695623474341e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80417437710146805538675929521229778181e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.23364098614130091185959973343748897970e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.12975537807357019330268041620753617442e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.36592279898578127130605391750428961301e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18495624730372864715421146607185990918e1),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - 0.125;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.8007e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.68160868054034088524891526884683014057e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85165791469635551063850795991424359350e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42938802867742165917839659578485422534e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59273512668331194186228996665355137458e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91680503091725091370507732042764517726e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85642348415580865994863513727308578556e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.90181935466760294413877600892013910183e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.89141276256233344773677083034724024215e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00250514074918631367419468760920281159e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28168216451109123143492880695546179794e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14996399533648172721538646235459709807e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.58122093722347315498230864294015130011e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.25168985723506298009849577846542992545e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01179759985059408785527092464505889999e5),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08766677593618443545489115711858395831e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.05163374816838964338807027995515659842e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62582103160439981904537982068579322820e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.62170991799612186300694554812291085206e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11013837158432827711075385018851760313e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45458895395245243570930804678601511371e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.08336489932795411216528182314354971403e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.11314692423102333551299419575616734987e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.43287683964711678082430107025218057096e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62052814931825182298493472041247278475e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91440920656902450957296030252809476245e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.54913345383745613446952578605023052270e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.76034827722473399290702590414091767416e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94027684838690965214346010602354223752e3),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.1905e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.10201085067542610656114408605853786551e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04725580445598482170291458376577106746e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35945839005443673797792325217359695272e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15894004364989372373490772246381545906e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54550169514753150042231386414687368032e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50389998399729913427837945242228928632e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75018554725308784191307050896936055909e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95901705695908219804887362154169268380e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34386856794684798098717884587473860604e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89025399683852111061217430321882178699e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19044156703773954109232310846984749672e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11932910013840927659486142481532276176e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64064398716881126082770692219937093427e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24909572944428286558287313527068259394e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.70912720447370835699164559729287157119e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21998644852982625437008410769048682388e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.95906385698373052547496572397097325447e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35344144061390771459100718852878517200e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34168669072527413734185948498168454149e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.24488907049996230177518311480230131257e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92059624838630990024209986717533470508e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84464614954263838504154559314144088371e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67874815200287308180777775077428545024e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65919857481420519138294080418011981524e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31466713452016682217190521435479677133e-10),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.5157e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06147398566773479301585022897491054494e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06137881154706023038556659418303323027e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.00274868819366386235164897614448662308e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03481313941011533876096564688041226638e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50172569438851062169493372974287427240e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.33370725278950299189434839636002761850e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.97566905908106543054773229070602272718e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85701973515993932384374087677862623215e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81956143385351702288398705969037130205e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49975572102999645354655667945479202048e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54665400959860442558683245665801873530e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94292402413454232307556797758030774716e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98038791388715925556623187510676330309e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11242951548709169234296005470944661995e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.92636379295018831848234711132457626676e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77389296072621088586880199705598178518e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.57808410784300002747916947756919004207e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.93860773322862111592582321183379587624e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52683694883265337797012770275040297516e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17837082293165509684677505408307814500e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06195236296471366891670923430225774487e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29459155224640682509948954218044556307e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.71350726081102446771887145938865551618e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55986063168260695680927535587363081713e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91996892322204645930710038043021675160e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43907073162091303683795779882887569537e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.50034830055055263363497137448887884379e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.56615898355501904078935686679056442496e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61099855362387625880067378834775577974e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12940315230564635808566630258463831421e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73572881409271303264226007333510301220e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77786420070246087920941454352749186288e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77914406265766625938477137082940482898e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19708585422668069396821478975324123588e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40406059898292960948942525697075698413e-15),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.6812e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48209596014908270566135466727658374314e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02026332003132864886056710532156370366e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68941634461905013212266453851941196774e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.61650792370551069313309111250434438540e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52611930219013953260661961529732777539e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29488123972430683478601278003510200360e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68806175827491046693183596144172426378e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51806782259569842628995584152985951836e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92353868262961486571527005289554589652e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21494769586031703137329731447673056499e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.39837421784601055804920937629607771973e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82216155524308827738242486229625170158e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04275785296896148301798836366902456306e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19999929939765873468528448012634122362e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.03583326787146398902262502660879425573e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59755092249701477917281379650537907903e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32583227076029470589713734885690555562e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18237323554153660947807202150429686004e-20),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.38459552164692902984228821988876295376e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21584899508575302641780901222203752951e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19656836695824518143414401720590693544e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39821085943818944882332778361549212756e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60484296824768079700823824408428524933e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22173385695010329771921985088956556771e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95541259523416810836752584764202086573e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02184255281138028802991551275755427743e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90825805251143907045903671893185297007e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00501277755608081163250456177637280682e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43387099521800224735155351696799358451e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63751106922299101655071906417624415019e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02796982349519589339629488980132546290e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26197249278457937947269910907701176956e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50981935956236238709523457678017928506e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65309560070040982176772709693008187384e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42817828965851841104270899392956866435e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.8388e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36719010559816175149447242695581604280e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14714772485724956396126176973339095223e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47792450677638612907408723539943311437e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14084804538576805298420530820092167411e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25784891219227004394312050838763762669e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06837168825575413225975778906503529455e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26908306638706189702624634771158355088e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06396335535135452379658152785541731746e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89854018431899039966628599727721422261e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.48974049316978526855972339306215972434e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50886538662952684349385729585856778829e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14095970401472469264258565259303801322e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71915162586912203234023473966563445362e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.46099196574734038609354417874908346873e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.69944075002490023348175340827135133316e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.37340205792165863440617831987825515203e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87812199530402923085142356622707924805e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76810877067601573471489978907720495511e-24),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.94373217074550329856398644558576545146e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17462725343185049507839058445338783693e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79202779096887355136298419604918306868e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97583473532621831662838256679872014292e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67819154370257505016693473230060726722e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14182379349642191946237975301363902175e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.38367234191828732305257162934647076311e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98221340505887984555143894024281550376e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17648765147609962405833802498013198305e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.94091261341172666220769613477202626517e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12169979717068598708585414568018667622e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70043694579268983742161305612636042906e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43651270200498902307944806310116446583e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.95266223996097470768947426604723764300e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15821111112681530432702452073811996961e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08041298058041360645934320138765284054e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91893114159827950553463154758337724676e-24),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.8746e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619775525705288697351261475419832625e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29882145587771350744255724773409752285e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07952726597277085327360888304737411175e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72928496414816922167597110591366081416e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01641163277458693633771532254570177776e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65627339211110756774878685166318417370e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41953343652571732907631074381749818724e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27682202874503433884090203197149318368e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33177176779158737868498722222027162030e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.34485618544363735547395633416797591537e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96996761199233617188435782568975757378e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49631247632674130553464740647053162499e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.68090516971007163491968659797593218680e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39910262557283449853923535586722968539e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83704888007521886644896435914745476741e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87884425419276681417666064027484555860e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36600092466902449189685791563990733005e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.53604301472332155307661986064796109517e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65664588229982894587678197374867153136e-40),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61098031370834273919229478584740981117e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.40810642301361416278392589243623940154e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.42874346984605660407576451987840217534e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30896462654364903689199648803900475405e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17246449391141576955714059812811712587e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22979790521806964047777145482613709395e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85960899519336488582042102184331670230e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66273852553220863665584472398487539899e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15372731217983084923067673501176233172e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09441788794783860366430915309857085224e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06279112323261126652767146380404236150e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36359339522621405197747209968637035618e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19770526521305519813109395521868217810e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.88562963284557433336083678206625018948e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95128165317597657325539450957778690578e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14570923483883184645242764315877865073e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.69599603258626408321886443187629340033e-26),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else if (ilogb(p) >= -128) {
+        RealType t = -log2(ldexp(p, 64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.9915e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581344576326594951209529606e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72456363182667891167613558295097711432e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74486567435450138741058930951301644059e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94624522781897679952110594449134468564e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09848623985771449914778668831103210333e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.47493285141689711937343304940229517457e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22134975575390048261922652492143139174e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30240148387764167235466713023950979069e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06917824188001432265980161955665997666e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27834220508404489112697949450988070802e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48663447630051388468872352628795428134e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50514504588736921389704370029090421684e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18965303814265217659151418619980209487e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74200654214326267651127117044008493519e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.34060054352573532839373386456991657111e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.34240516843783954067548886404044879120e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07803703545135964499326712080667886449e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61500479431085205124031101160332446432e-23),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27973454499231032893774072677004977154e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02401389920613749641292661572240166038e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24819328156695252221821935845914708591e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27210724381675120281861717194783977895e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01708007392492681238863778030115281961e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63088069045476088736355784718397594807e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61660379368211892821215228891806384883e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67946125503415200067055797463173521598e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72040422051759599096448422858046040086e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33519997465950200122152159780364149268e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07666528975810553712124845533861745455e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86870262247486708096341722190198527508e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30713380444621290817686989936029997572e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38899571664905345700275460272815357978e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46750157220118157937510816924752429685e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69337661543585547694652989893297703060e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53684359865963395505791671817598669527e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * p);
+    }
+    else {
+        result = 2 / (constants::pi<RealType>() * p);
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 53>& tag)
+{
+    if (p > 0.5) 
+    {
+        return !complement ? landau_quantile_upper_imp_prec(1 - p, tag) : landau_quantile_lower_imp_prec(1 - p, tag);
+    }
+
+    return complement ? landau_quantile_upper_imp_prec(p, tag) : landau_quantile_lower_imp_prec(p, tag);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 113>& tag)
+{
+    if (p > 0.5) 
+    {
+        return !complement ? landau_quantile_upper_imp_prec(1 - p, tag) : landau_quantile_lower_imp_prec(1 - p, tag);
+    }
+
+    return complement ? landau_quantile_upper_imp_prec(p, tag) : landau_quantile_lower_imp_prec(p, tag);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType landau_quantile_imp(const landau_distribution<RealType, Policy>& dist, const RealType& p, bool complement)
+{
+    // This routine implements the quantile for the Landau distribution,
+    // the value p may be the probability, or its complement if complement=true.
+
+    constexpr auto function = "boost::math::quantile(landau<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+    RealType bias = dist.bias();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_probability(function, p, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * (landau_quantile_imp_prec(p, complement, tag_type()) - bias);
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_mode_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(-0.42931452986133525017);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_mode_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, -0.42931452986133525016556463510885028346);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType landau_mode_imp(const landau_distribution<RealType, Policy>& dist)
+{
+    // This implements the mode for the Landau distribution,
+
+    constexpr auto function = "boost::math::mode(landau<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+    RealType bias = dist.bias();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * (landau_mode_imp_prec<RealType>(tag_type()) - bias);
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_median_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(0.57563014394507821440);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_median_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, 0.57563014394507821439627930892257517269);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType landau_median_imp(const landau_distribution<RealType, Policy>& dist)
+{
+    // This implements the median for the Landau distribution,
+
+    constexpr auto function = "boost::math::median(landau<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+    RealType bias = dist.bias();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * (landau_median_imp_prec<RealType>(tag_type()) - bias);
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_entropy_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(2.37263644000448182448);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType landau_entropy_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.3726364400044818244844049010588577710);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType landau_entropy_imp(const landau_distribution<RealType, Policy>& dist)
+{
+    // This implements the entropy for the Landau distribution,
+
+    constexpr auto function = "boost::math::entropy(landau<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Landau distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = landau_entropy_imp_prec<RealType>(tag_type()) + log(scale);
+
+    return result;
+}
+
+} // detail
+
+template <class RealType = double, class Policy = policies::policy<> >
+class landau_distribution
+{
+    public:
+    typedef RealType value_type;
+    typedef Policy policy_type;
+
+    BOOST_MATH_GPU_ENABLED landau_distribution(RealType l_location = 0, RealType l_scale = 1)
+        : mu(l_location), c(l_scale)
+    {
+        BOOST_MATH_STD_USING
+        
+        constexpr auto function = "boost::math::landau_distribution<%1%>::landau_distribution";
+        RealType result = 0;
+        detail::check_location(function, l_location, &result, Policy());
+        detail::check_scale(function, l_scale, &result, Policy());
+
+        location_bias = -2 / constants::pi<RealType>() * log(l_scale);
+    } // landau_distribution
+
+    BOOST_MATH_GPU_ENABLED RealType location()const
+    {
+        return mu;
+    }
+    BOOST_MATH_GPU_ENABLED RealType scale()const
+    {
+        return c;
+    }
+    BOOST_MATH_GPU_ENABLED RealType bias()const
+    {
+        return location_bias;
+    }
+
+    private:
+    RealType mu;    // The location parameter.
+    RealType c;     // The scale parameter.
+    RealType location_bias;  // = -2 / pi * log(c)
+};
+
+typedef landau_distribution<double> landau;
+
+#ifdef __cpp_deduction_guides
+template <class RealType>
+landau_distribution(RealType) -> landau_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+template <class RealType>
+landau_distribution(RealType, RealType) -> landau_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+#endif
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const landau_distribution<RealType, Policy>&)
+{ // Range of permissible values for random variable x.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const landau_distribution<RealType, Policy>&)
+{ // Range of supported values for random variable x.
+   // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-tools::max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const landau_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::landau_pdf_imp(dist, x);
+} // pdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const landau_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::landau_cdf_imp(dist, x, false);
+} // cdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const landau_distribution<RealType, Policy>& dist, const RealType& p)
+{
+    return detail::landau_quantile_imp(dist, p, false);
+} // quantile
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<landau_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::landau_cdf_imp(c.dist, c.param, true);
+} //  cdf complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<landau_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::landau_quantile_imp(c.dist, c.param, true);
+} // quantile complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mean(const landau_distribution<RealType, Policy>&)
+{  // There is no mean:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Landau Distribution has no mean");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::mean(landau<%1%>&)",
+        "The Landau distribution does not have a mean: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType variance(const landau_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no variance:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Landau Distribution has no variance");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::variance(landau<%1%>&)",
+        "The Landau distribution does not have a variance: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mode(const landau_distribution<RealType, Policy>& dist)
+{
+    return detail::landau_mode_imp(dist);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType median(const landau_distribution<RealType, Policy>& dist)
+{
+    return detail::landau_median_imp(dist);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const landau_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no skewness:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Landau Distribution has no skewness");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::skewness(landau<%1%>&)",
+        "The Landau distribution does not have a skewness: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const landau_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Landau Distribution has no kurtosis");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis(landau<%1%>&)",
+        "The Landau distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const landau_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis excess:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Landau Distribution has no kurtosis excess");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis_excess(landau<%1%>&)",
+        "The Landau distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const landau_distribution<RealType, Policy>& dist)
+{
+    return detail::landau_entropy_imp(dist);
+}
+
+}} // namespaces
+
+
+#endif // BOOST_STATS_LANDAU_HPP
diff --git a/include/boost/math/distributions/laplace.hpp b/include/boost/math/distributions/laplace.hpp
index 81ae8fed9d..81a0abe1ab 100644
--- a/include/boost/math/distributions/laplace.hpp
+++ b/include/boost/math/distributions/laplace.hpp
@@ -1,6 +1,7 @@
 //  Copyright Thijs van den Berg, 2008.
 //  Copyright John Maddock 2008.
 //  Copyright Paul A. Bristow 2008, 2014.
+//  Copyright Matt Borland 2024.
 
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
@@ -17,11 +18,15 @@
 #ifndef BOOST_STATS_LAPLACE_HPP
 #define BOOST_STATS_LAPLACE_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/constants/constants.hpp>
-#include <limits>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 namespace boost{ namespace math{
 
@@ -43,7 +48,7 @@ class laplace_distribution
    // ----------------------------------
    // Constructor(s)
    // ----------------------------------
-   explicit laplace_distribution(RealType l_location = 0, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED explicit laplace_distribution(RealType l_location = 0, RealType l_scale = 1)
       : m_location(l_location), m_scale(l_scale)
    {
       RealType result;
@@ -55,17 +60,17 @@ class laplace_distribution
    // Public functions
    // ----------------------------------
 
-   RealType location() const
+   BOOST_MATH_GPU_ENABLED RealType location() const
    {
       return m_location;
    }
 
-   RealType scale() const
+   BOOST_MATH_GPU_ENABLED RealType scale() const
    {
       return m_scale;
    }
 
-   bool check_parameters(const char* function, RealType* result) const
+   BOOST_MATH_GPU_ENABLED bool check_parameters(const char* function, RealType* result) const
    {
          if(false == detail::check_scale(function, m_scale, result, Policy())) return false;
          if(false == detail::check_location(function, m_location, result, Policy())) return false;
@@ -91,42 +96,42 @@ laplace_distribution(RealType,RealType)->laplace_distribution<typename boost::ma
 //
 // Non-member functions.
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const laplace_distribution<RealType, Policy>&)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const laplace_distribution<RealType, Policy>&)
 {
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   {  // Can use infinity.
-     return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
+     return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
   { // Can only use max_value.
     using boost::math::tools::max_value;
-    return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max value.
+    return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max value.
   }
 
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const laplace_distribution<RealType, Policy>&)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const laplace_distribution<RealType, Policy>&)
 {
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   { // Can Use infinity.
-     return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
+     return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
   { // Can only use max_value.
     using boost::math::tools::max_value;
-    return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max value.
+    return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max value.
   }
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
    // Checking function argument
    RealType result = 0;
-   const char* function = "boost::math::pdf(const laplace_distribution<%1%>&, %1%))";
+   constexpr auto function = "boost::math::pdf(const laplace_distribution<%1%>&, %1%))";
 
    // Check scale and location.
    if (false == dist.check_parameters(function, &result)) return result;
@@ -152,13 +157,13 @@ inline RealType pdf(const laplace_distribution<RealType, Policy>& dist, const Re
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
    // Checking function argument
-   RealType result = -std::numeric_limits<RealType>::infinity();
-   const char* function = "boost::math::logpdf(const laplace_distribution<%1%>&, %1%))";
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
+   constexpr auto function = "boost::math::logpdf(const laplace_distribution<%1%>&, %1%))";
 
    // Check scale and location.
    if (false == dist.check_parameters(function, &result))
@@ -178,8 +183,8 @@ inline RealType logpdf(const laplace_distribution<RealType, Policy>& dist, const
    const RealType mu = dist.scale();
    const RealType b = dist.location();
 
-   // if b is 0 avoid divde by 0 error
-   if(abs(b) < std::numeric_limits<RealType>::epsilon())
+   // if b is 0 avoid divide by 0 error
+   if(abs(b) < boost::math::numeric_limits<RealType>::epsilon())
    {
       result = log(pdf(dist, x));
    }
@@ -194,13 +199,13 @@ inline RealType logpdf(const laplace_distribution<RealType, Policy>& dist, const
 } // logpdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // For ADL of std functions.
 
    RealType result = 0;
    // Checking function argument.
-   const char* function = "boost::math::cdf(const laplace_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const laplace_distribution<%1%>&, %1%)";
    // Check scale and location.
    if (false == dist.check_parameters(function, &result)) return result;
 
@@ -228,13 +233,13 @@ inline RealType cdf(const laplace_distribution<RealType, Policy>& dist, const Re
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType logcdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const laplace_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // For ADL of std functions.
 
    RealType result = 0;
    // Checking function argument.
-   const char* function = "boost::math::logcdf(const laplace_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logcdf(const laplace_distribution<%1%>&, %1%)";
    // Check scale and location.
    if (false == dist.check_parameters(function, &result)) 
    {
@@ -273,13 +278,13 @@ inline RealType logcdf(const laplace_distribution<RealType, Policy>& dist, const
 } // logcdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const laplace_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const laplace_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING // for ADL of std functions.
 
    // Checking function argument
    RealType result = 0;
-   const char* function = "boost::math::quantile(const laplace_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const laplace_distribution<%1%>&, %1%)";
    if (false == dist.check_parameters(function, &result)) return result;
    if(false == detail::check_probability(function, p, &result, Policy())) return result;
 
@@ -311,7 +316,7 @@ inline RealType quantile(const laplace_distribution<RealType, Policy>& dist, con
 
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<laplace_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<laplace_distribution<RealType, Policy>, RealType>& c)
 {
    // Calculate complement of cdf.
    BOOST_MATH_STD_USING // for ADL of std functions
@@ -322,7 +327,7 @@ inline RealType cdf(const complemented2_type<laplace_distribution<RealType, Poli
    RealType result = 0;
 
    // Checking function argument.
-   const char* function = "boost::math::cdf(const complemented2_type<laplace_distribution<%1%>, %1%>&)";
+   constexpr auto function = "boost::math::cdf(const complemented2_type<laplace_distribution<%1%>, %1%>&)";
 
    // Check scale and location.
     if (false == c.dist.check_parameters(function, &result)) return result;
@@ -348,7 +353,7 @@ inline RealType cdf(const complemented2_type<laplace_distribution<RealType, Poli
 } // cdf complement
 
 template <class RealType, class Policy>
-inline RealType logcdf(const complemented2_type<laplace_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<laplace_distribution<RealType, Policy>, RealType>& c)
 {
    // Calculate complement of logcdf.
    BOOST_MATH_STD_USING // for ADL of std functions
@@ -359,7 +364,7 @@ inline RealType logcdf(const complemented2_type<laplace_distribution<RealType, P
    RealType result = 0;
 
    // Checking function argument.
-   const char* function = "boost::math::logcdf(const complemented2_type<laplace_distribution<%1%>, %1%>&)";
+   constexpr auto function = "boost::math::logcdf(const complemented2_type<laplace_distribution<%1%>, %1%>&)";
 
    // Check scale and location.
     if (false == c.dist.check_parameters(function, &result)) return result;
@@ -389,7 +394,7 @@ inline RealType logcdf(const complemented2_type<laplace_distribution<RealType, P
 } // cdf complement
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<laplace_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<laplace_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions.
 
@@ -400,17 +405,17 @@ inline RealType quantile(const complemented2_type<laplace_distribution<RealType,
    RealType result = 0;
 
    // Checking function argument.
-   const char* function = "quantile(const complemented2_type<laplace_distribution<%1%>, %1%>&)";
+   constexpr auto function = "quantile(const complemented2_type<laplace_distribution<%1%>, %1%>&)";
    if (false == c.dist.check_parameters(function, &result)) return result;
    
    // Extreme values.
    if(q == 0)
    {
-       return std::numeric_limits<RealType>::infinity();
+       return boost::math::numeric_limits<RealType>::infinity();
    }
    if(q == 1)
    {
-       return -std::numeric_limits<RealType>::infinity();
+       return -boost::math::numeric_limits<RealType>::infinity();
    }
    if(false == detail::check_probability(function, q, &result, Policy())) return result;
 
@@ -424,49 +429,49 @@ inline RealType quantile(const complemented2_type<laplace_distribution<RealType,
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType mean(const laplace_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const laplace_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
 
 template <class RealType, class Policy>
-inline RealType standard_deviation(const laplace_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const laplace_distribution<RealType, Policy>& dist)
 {
    return constants::root_two<RealType>() * dist.scale();
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const laplace_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const laplace_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
 
 template <class RealType, class Policy>
-inline RealType median(const laplace_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const laplace_distribution<RealType, Policy>& dist)
 {
    return dist.location();
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const laplace_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const laplace_distribution<RealType, Policy>& /*dist*/)
 {
    return 0;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const laplace_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const laplace_distribution<RealType, Policy>& /*dist*/)
 {
    return 6;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const laplace_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const laplace_distribution<RealType, Policy>& /*dist*/)
 {
    return 3;
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const laplace_distribution<RealType, Policy> & dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const laplace_distribution<RealType, Policy> & dist)
 {
    using std::log;
    return log(2*dist.scale()*constants::e<RealType>());
diff --git a/include/boost/math/distributions/logistic.hpp b/include/boost/math/distributions/logistic.hpp
index d12de48c59..56dc6e9f2f 100644
--- a/include/boost/math/distributions/logistic.hpp
+++ b/include/boost/math/distributions/logistic.hpp
@@ -1,5 +1,5 @@
 // Copyright 2008 Gautam Sewani
-//
+// Copyright 2024 Matt Borland
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -8,12 +8,17 @@
 #ifndef BOOST_MATH_DISTRIBUTIONS_LOGISTIC
 #define BOOST_MATH_DISTRIBUTIONS_LOGISTIC
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/precision.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/constants/constants.hpp>
-#include <utility>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 namespace boost { namespace math { 
 
@@ -24,22 +29,22 @@ namespace boost { namespace math {
       typedef RealType value_type;
       typedef Policy policy_type;
       
-      logistic_distribution(RealType l_location=0, RealType l_scale=1) // Constructor.
+      BOOST_MATH_GPU_ENABLED logistic_distribution(RealType l_location=0, RealType l_scale=1) // Constructor.
         : m_location(l_location), m_scale(l_scale) 
       {
-        static const char* function = "boost::math::logistic_distribution<%1%>::logistic_distribution";
+        constexpr auto function = "boost::math::logistic_distribution<%1%>::logistic_distribution";
         
         RealType result;
         detail::check_scale(function, l_scale, &result, Policy());
         detail::check_location(function, l_location, &result, Policy());
       }
       // Accessor functions.
-      RealType scale()const
+      BOOST_MATH_GPU_ENABLED RealType scale()const
       {
         return m_scale;
       }
       
-      RealType location()const
+      BOOST_MATH_GPU_ENABLED RealType location()const
       {
         return m_location;
       }
@@ -60,26 +65,26 @@ namespace boost { namespace math {
     #endif
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const logistic_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const logistic_distribution<RealType, Policy>& /* dist */)
     { // Range of permissible values for random variable x.
       using boost::math::tools::max_value;
-      return std::pair<RealType, RealType>(
-         std::numeric_limits<RealType>::has_infinity ? -std::numeric_limits<RealType>::infinity() : -max_value<RealType>(), 
-         std::numeric_limits<RealType>::has_infinity ? std::numeric_limits<RealType>::infinity() : max_value<RealType>());
+      return boost::math::pair<RealType, RealType>(
+         boost::math::numeric_limits<RealType>::has_infinity ? -boost::math::numeric_limits<RealType>::infinity() : -max_value<RealType>(), 
+         boost::math::numeric_limits<RealType>::has_infinity ? boost::math::numeric_limits<RealType>::infinity() : max_value<RealType>());
     }
     
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const logistic_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const logistic_distribution<RealType, Policy>& /* dist */)
     { // Range of supported values for random variable x.
       // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
       using boost::math::tools::max_value;
-      return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + infinity
+      return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + infinity
     }
      
     template <class RealType, class Policy>
-    inline RealType pdf(const logistic_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType pdf(const logistic_distribution<RealType, Policy>& dist, const RealType& x)
     {
-       static const char* function = "boost::math::pdf(const logistic_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::pdf(const logistic_distribution<%1%>&, %1%)";
        RealType scale = dist.scale();
        RealType location = dist.location();
        RealType result = 0;
@@ -114,12 +119,12 @@ namespace boost { namespace math {
     } 
     
     template <class RealType, class Policy>
-    inline RealType cdf(const logistic_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const logistic_distribution<RealType, Policy>& dist, const RealType& x)
     {
        RealType scale = dist.scale();
        RealType location = dist.location();
        RealType result = 0; // of checks.
-       static const char* function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)";
        if(false == detail::check_scale(function, scale, &result, Policy()))
        {
           return result;
@@ -149,12 +154,12 @@ namespace boost { namespace math {
     }
 
     template <class RealType, class Policy>
-    inline RealType logcdf(const logistic_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType logcdf(const logistic_distribution<RealType, Policy>& dist, const RealType& x)
     {
        RealType scale = dist.scale();
        RealType location = dist.location();
        RealType result = 0; // of checks.
-       static const char* function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::cdf(const logistic_distribution<%1%>&, %1%)";
        if(false == detail::check_scale(function, scale, &result, Policy()))
        {
           return result;
@@ -192,13 +197,13 @@ namespace boost { namespace math {
     } 
     
     template <class RealType, class Policy>
-    inline RealType quantile(const logistic_distribution<RealType, Policy>& dist, const RealType& p)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const logistic_distribution<RealType, Policy>& dist, const RealType& p)
     {
        BOOST_MATH_STD_USING
        RealType location = dist.location();
        RealType scale = dist.scale();
 
-       static const char* function = "boost::math::quantile(const logistic_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::quantile(const logistic_distribution<%1%>&, %1%)";
 
        RealType result = 0;
        if(false == detail::check_scale(function, scale, &result, Policy()))
@@ -228,13 +233,13 @@ namespace boost { namespace math {
      } // RealType quantile(const logistic_distribution<RealType, Policy>& dist, const RealType& p)
     
     template <class RealType, class Policy>
-    inline RealType cdf(const complemented2_type<logistic_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<logistic_distribution<RealType, Policy>, RealType>& c)
     {
        BOOST_MATH_STD_USING
        RealType location = c.dist.location();
        RealType scale = c.dist.scale();
        RealType x = c.param;
-       static const char* function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)";
+       constexpr auto function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)";
 
        RealType result = 0;
        if(false == detail::check_scale(function, scale, &result, Policy()))
@@ -263,13 +268,13 @@ namespace boost { namespace math {
     } 
 
     template <class RealType, class Policy>
-    inline RealType logcdf(const complemented2_type<logistic_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<logistic_distribution<RealType, Policy>, RealType>& c)
     {
        BOOST_MATH_STD_USING
        RealType location = c.dist.location();
        RealType scale = c.dist.scale();
        RealType x = c.param;
-       static const char* function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)";
+       constexpr auto function = "boost::math::cdf(const complement(logistic_distribution<%1%>&), %1%)";
 
        RealType result = 0;
        if(false == detail::check_scale(function, scale, &result, Policy()))
@@ -299,12 +304,12 @@ namespace boost { namespace math {
     }  
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<logistic_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<logistic_distribution<RealType, Policy>, RealType>& c)
     {
        BOOST_MATH_STD_USING
        RealType scale = c.dist.scale();
        RealType location = c.dist.location();
-       static const char* function = "boost::math::quantile(const complement(logistic_distribution<%1%>&), %1%)";
+       constexpr auto function = "boost::math::quantile(const complement(logistic_distribution<%1%>&), %1%)";
        RealType result = 0;
        if(false == detail::check_scale(function, scale, &result, Policy()))
           return result;
@@ -335,13 +340,13 @@ namespace boost { namespace math {
     } 
     
     template <class RealType, class Policy>
-    inline RealType mean(const logistic_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const logistic_distribution<RealType, Policy>& dist)
     {
       return dist.location();
     } // RealType mean(const logistic_distribution<RealType, Policy>& dist)
     
     template <class RealType, class Policy>
-    inline RealType variance(const logistic_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const logistic_distribution<RealType, Policy>& dist)
     {
       BOOST_MATH_STD_USING
       RealType scale = dist.scale();
@@ -349,36 +354,36 @@ namespace boost { namespace math {
     } // RealType variance(const logistic_distribution<RealType, Policy>& dist)
     
     template <class RealType, class Policy>
-    inline RealType mode(const logistic_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const logistic_distribution<RealType, Policy>& dist)
     {
       return dist.location();
     }
     
     template <class RealType, class Policy>
-    inline RealType median(const logistic_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType median(const logistic_distribution<RealType, Policy>& dist)
     {
       return dist.location();
     }
     template <class RealType, class Policy>
-    inline RealType skewness(const logistic_distribution<RealType, Policy>& /*dist*/)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const logistic_distribution<RealType, Policy>& /*dist*/)
     {
       return 0;
     } // RealType skewness(const logistic_distribution<RealType, Policy>& dist)
     
     template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const logistic_distribution<RealType, Policy>& /*dist*/)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const logistic_distribution<RealType, Policy>& /*dist*/)
     {
       return static_cast<RealType>(6)/5; 
     } // RealType kurtosis_excess(const logistic_distribution<RealType, Policy>& dist)
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const logistic_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const logistic_distribution<RealType, Policy>& dist)
     {
       return kurtosis_excess(dist) + 3;
     } // RealType kurtosis_excess(const logistic_distribution<RealType, Policy>& dist)
 
     template <class RealType, class Policy>
-    inline RealType entropy(const logistic_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType entropy(const logistic_distribution<RealType, Policy>& dist)
     {
        using std::log;
        return 2 + log(dist.scale());
diff --git a/include/boost/math/distributions/lognormal.hpp b/include/boost/math/distributions/lognormal.hpp
index 3c8f576e56..dfc3e4b2a2 100644
--- a/include/boost/math/distributions/lognormal.hpp
+++ b/include/boost/math/distributions/lognormal.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,12 +11,15 @@
 // http://mathworld.wolfram.com/LogNormalDistribution.html
 // http://en.wikipedia.org/wiki/Lognormal_distribution
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/promotion.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/distributions/normal.hpp>
 #include <boost/math/special_functions/expm1.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
-
-#include <utility>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/constants/constants.hpp>
 
 namespace boost{ namespace math
 {
@@ -23,7 +27,7 @@ namespace detail
 {
 
   template <class RealType, class Policy>
-  inline bool check_lognormal_x(
+  BOOST_MATH_GPU_ENABLED inline bool check_lognormal_x(
         const char* function,
         RealType const& x,
         RealType* result, const Policy& pol)
@@ -48,7 +52,7 @@ class lognormal_distribution
    typedef RealType value_type;
    typedef Policy policy_type;
 
-   lognormal_distribution(RealType l_location = 0, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED lognormal_distribution(RealType l_location = 0, RealType l_scale = 1)
       : m_location(l_location), m_scale(l_scale)
    {
       RealType result;
@@ -56,12 +60,12 @@ class lognormal_distribution
       detail::check_location("boost::math::lognormal_distribution<%1%>::lognormal_distribution", l_location, &result, Policy());
    }
 
-   RealType location()const
+   BOOST_MATH_GPU_ENABLED RealType location()const
    {
       return m_location;
    }
 
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    {
       return m_scale;
    }
@@ -83,29 +87,29 @@ lognormal_distribution(RealType,RealType)->lognormal_distribution<typename boost
 #endif
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> range(const lognormal_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const lognormal_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x is >0 to +infinity.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> support(const lognormal_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const lognormal_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-RealType pdf(const lognormal_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED RealType pdf(const lognormal_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType mu = dist.location();
    RealType sigma = dist.scale();
 
-   static const char* function = "boost::math::pdf(const lognormal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const lognormal_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(0 == detail::check_scale(function, sigma, &result, Policy()))
@@ -129,11 +133,11 @@ RealType pdf(const lognormal_distribution<RealType, Policy>& dist, const RealTyp
 }
 
 template <class RealType, class Policy>
-inline RealType cdf(const lognormal_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const lognormal_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(0 == detail::check_scale(function, dist.scale(), &result, Policy()))
@@ -151,11 +155,11 @@ inline RealType cdf(const lognormal_distribution<RealType, Policy>& dist, const
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const lognormal_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const lognormal_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(0 == detail::check_scale(function, dist.scale(), &result, Policy()))
@@ -175,11 +179,11 @@ inline RealType quantile(const lognormal_distribution<RealType, Policy>& dist, c
 }
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<lognormal_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<lognormal_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const lognormal_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(0 == detail::check_scale(function, c.dist.scale(), &result, Policy()))
@@ -197,11 +201,11 @@ inline RealType cdf(const complemented2_type<lognormal_distribution<RealType, Po
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<lognormal_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<lognormal_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const lognormal_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(0 == detail::check_scale(function, c.dist.scale(), &result, Policy()))
@@ -221,7 +225,7 @@ inline RealType quantile(const complemented2_type<lognormal_distribution<RealTyp
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const lognormal_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
@@ -238,7 +242,7 @@ inline RealType mean(const lognormal_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType variance(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const lognormal_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
@@ -255,7 +259,7 @@ inline RealType variance(const lognormal_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const lognormal_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
@@ -272,7 +276,7 @@ inline RealType mode(const lognormal_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType median(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const lognormal_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
    RealType mu = dist.location();
@@ -280,7 +284,7 @@ inline RealType median(const lognormal_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const lognormal_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
@@ -300,7 +304,7 @@ inline RealType skewness(const lognormal_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const lognormal_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
@@ -318,7 +322,7 @@ inline RealType kurtosis(const lognormal_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const lognormal_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
@@ -336,9 +340,9 @@ inline RealType kurtosis_excess(const lognormal_distribution<RealType, Policy>&
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const lognormal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const lognormal_distribution<RealType, Policy>& dist)
 {
-   using std::log;
+   BOOST_MATH_STD_USING
    RealType mu = dist.location();
    RealType sigma = dist.scale();
    return mu + log(constants::two_pi<RealType>()*constants::e<RealType>()*sigma*sigma)/2;
diff --git a/include/boost/math/distributions/mapairy.hpp b/include/boost/math/distributions/mapairy.hpp
new file mode 100644
index 0000000000..8bf1f990c1
--- /dev/null
+++ b/include/boost/math/distributions/mapairy.hpp
@@ -0,0 +1,4220 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_STATS_MAPAIRY_HPP
+#define BOOST_STATS_MAPAIRY_HPP
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4127) // conditional expression is constant
+#endif
+
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/tools/big_constant.hpp>
+#include <boost/math/distributions/complement.hpp>
+#include <boost/math/distributions/detail/common_error_handling.hpp>
+#include <boost/math/distributions/detail/derived_accessors.hpp>
+#include <boost/math/tools/rational.hpp>
+#include <boost/math/special_functions/cbrt.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/tools/promotion.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
+#include <cmath>
+#endif
+
+namespace boost { namespace math {
+template <class RealType, class Policy>
+class mapairy_distribution;
+
+namespace detail {
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 3.7591e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.97516171847191855610e-1),
+            static_cast<RealType>(3.67488253628465083737e-2),
+            static_cast<RealType>(-9.73242224038828612673e-4),
+            static_cast<RealType>(2.32207514136635673061e-3),
+            static_cast<RealType>(5.69067907423210669037e-5),
+            static_cast<RealType>(-6.02637387141524535193e-5),
+            static_cast<RealType>(1.04960324426666933327e-5),
+            static_cast<RealType>(-6.58470237954242016920e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(7.09464351647314165710e-1),
+            static_cast<RealType>(3.66413036246461392316e-1),
+            static_cast<RealType>(1.10947882302862241488e-1),
+            static_cast<RealType>(2.65928486676817177159e-2),
+            static_cast<RealType>(3.75507284977386290874e-3),
+            static_cast<RealType>(4.03789594641339005785e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.5996e-20
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.06251243013238748252e-1),
+            static_cast<RealType>(1.38178831205785069108e-2),
+            static_cast<RealType>(4.19280374368049006206e-3),
+            static_cast<RealType>(8.54607219684690930289e-4),
+            static_cast<RealType>(-7.46881084120928210702e-5),
+            static_cast<RealType>(1.47110856483345063335e-5),
+            static_cast<RealType>(-1.30090180307471994500e-6),
+            static_cast<RealType>(5.24801123304330014713e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.10853683888611687140e-1),
+            static_cast<RealType>(3.89361261627717143905e-1),
+            static_cast<RealType>(1.15124062681082170577e-1),
+            static_cast<RealType>(2.38803416611949902468e-2),
+            static_cast<RealType>(3.08616898814509065071e-3),
+            static_cast<RealType>(2.43760043942846261876e-4),
+            static_cast<RealType>(1.34538901435238836768e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1592e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(5.33842514891989443409e-2),
+            static_cast<RealType>(1.23301980674903270971e-2),
+            static_cast<RealType>(3.45717831433988631923e-3),
+            static_cast<RealType>(3.27034449923176875761e-4),
+            static_cast<RealType>(1.20406794831890291348e-5),
+            static_cast<RealType>(5.77489170397965604669e-7),
+            static_cast<RealType>(-1.15255267205685159063e-7),
+            static_cast<RealType>(9.15896323073109992939e-9),
+            static_cast<RealType>(-3.14068002815368247985e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(9.08772985520393226044e-1),
+            static_cast<RealType>(4.26418573702560818267e-1),
+            static_cast<RealType>(1.22033746594868893316e-1),
+            static_cast<RealType>(2.27934009200310243172e-2),
+            static_cast<RealType>(2.60658999011198623962e-3),
+            static_cast<RealType>(1.54461660261435227768e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.2228e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.58950538583133457384e-2),
+            static_cast<RealType>(7.47835440063141601948e-3),
+            static_cast<RealType>(1.81137244353261478410e-3),
+            static_cast<RealType>(2.26935565382135588558e-4),
+            static_cast<RealType>(1.43877113825683795505e-5),
+            static_cast<RealType>(2.08242747557417233626e-7),
+            static_cast<RealType>(-1.54976465724771282989e-9),
+            static_cast<RealType>(1.30762989300333026019e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(9.95505437381674174441e-1),
+            static_cast<RealType>(4.58882737262511297099e-1),
+            static_cast<RealType>(1.25031310192148865496e-1),
+            static_cast<RealType>(2.15727229249904102247e-2),
+            static_cast<RealType>(2.33597081566665672569e-3),
+            static_cast<RealType>(1.45198998318300328562e-4),
+            static_cast<RealType>(3.87962234445835345676e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0257e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(3.22517551525042172428e-3),
+            static_cast<RealType>(1.12822806030796339659e-3),
+            static_cast<RealType>(1.54489389961322571031e-4),
+            static_cast<RealType>(9.28479992527909796427e-6),
+            static_cast<RealType>(2.06168350199745832262e-7),
+            static_cast<RealType>(9.05110751997021418539e-10),
+            static_cast<RealType>(-2.15498112371756202097e-12),
+            static_cast<RealType>(6.41838355699777435924e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(6.53390465399680164234e-1),
+            static_cast<RealType>(1.82759048270449018482e-1),
+            static_cast<RealType>(2.80407546367978533849e-2),
+            static_cast<RealType>(2.50853443923476718145e-3),
+            static_cast<RealType>(1.27671852825846245421e-4),
+            static_cast<RealType>(3.28380135691060279203e-6),
+            static_cast<RealType>(3.06545317089055335742e-8),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.0510e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(5.82527663232857270992e-4),
+            static_cast<RealType>(6.89502117025124630567e-5),
+            static_cast<RealType>(2.24909795087265741433e-6),
+            static_cast<RealType>(2.18576787334972903790e-8),
+            static_cast<RealType>(3.39014723444178274435e-11),
+            static_cast<RealType>(-9.74481309265612390297e-15),
+            static_cast<RealType>(-1.13308546492906818388e-16),
+            static_cast<RealType>(5.32472028720777735712e-19),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.74018883667663396766e-1),
+            static_cast<RealType>(2.95901195665990089660e-2),
+            static_cast<RealType>(1.57901733512147920251e-3),
+            static_cast<RealType>(4.24965124147621236633e-5),
+            static_cast<RealType>(5.17522027193205842016e-7),
+            static_cast<RealType>(2.00522219276570039934e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.3294e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.03264853379349880039e-4),
+            static_cast<RealType>(5.35256306644392405447e-6),
+            static_cast<RealType>(9.00657716972118816692e-8),
+            static_cast<RealType>(5.34913574042209793720e-10),
+            static_cast<RealType>(6.70752605041678779380e-13),
+            static_cast<RealType>(-5.30089923101856817552e-16),
+            static_cast<RealType>(7.28133811621687143754e-19),
+            static_cast<RealType>(-7.38047553655951666420e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.29920843258164337377e-1),
+            static_cast<RealType>(6.75018577147646502386e-3),
+            static_cast<RealType>(1.77694968039695671819e-4),
+            static_cast<RealType>(2.46428299911920942946e-6),
+            static_cast<RealType>(1.67165053157990942546e-8),
+            static_cast<RealType>(4.19496974141131087116e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x * x * x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.6693e-20
+        BOOST_MATH_STATIC const RealType P[5] = {
+            static_cast<RealType>(5.98413420602149016910e-1),
+            static_cast<RealType>(3.14584075817417883086e-5),
+            static_cast<RealType>(1.62977928311793051895e1),
+            static_cast<RealType>(-4.12903117172994371875e-4),
+            static_cast<RealType>(-1.06404478702135751872e2),
+        };
+        BOOST_MATH_STATIC const RealType Q[3] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.25696892802060720079e-5),
+            static_cast<RealType>(4.03600055498020483920e1),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x;
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 7.8308e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97516171847191855609649452292217911973e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17531822787252717270400174744562144891e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.85115358761409188259685286269086053296e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18029395189535552537870932989876189597e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77412874842522285996566741532939343827e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77992070255086842672551073580133785334e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.54573264286260796576738952968288691782e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.94764012694602906119831079380500255557e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.97596258932025712802674070104281981323e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45466169112247379589927514614067756956e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.99760415118300349769641418430273526815e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43150486566834492207695241913522311930e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46130347604880355784938321408765318948e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11845869711743584628289654085905424438e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.80391154854347711297249357734993136108e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75628443538173255184583966965162835227e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41016303833742742212624596040074202424e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19142300833563644046500846364541891138e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02421707708633106515934651956262614532e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03973732602338507411104824853671547615e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35206168908201402570766383018708660819e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38602606623008690327520130558254165564e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53740175911385378188372963739884519312e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27513004715414297729539702862351044344e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54510493017251997793679126704007098265e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.0723e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06251243013238748252181151646220197947e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92438638323563234519452281479338921158e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83335793178622701784730867677919844599e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.84159075203218824591724451142550478306e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04213732090358859917896442076931334722e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72388220651785798237487005913708387756e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.36099324022668533012286817710272936865e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74483270731217433628720245792741986795e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.56461597064783966758904403291149549559e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28590608939674970691948223694855264817e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.81756745849477762773082030302943341729e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.65915115243311285178083515017249358853e-12),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33250387018216706082200927591739589024e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.71707718560216685629188467984384070512e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81316277289673837399162302797006618384e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78475951599121894570443981591530879087e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.16167801098514576400689883575304687623e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19167794366424137722223009369062644830e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20831082064982892777497773490792080382e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27196399162146247210036306870401328410e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79335434374966775903734846875100087590e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30825409557870847168672662674521614782e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97296173230649275943984471731360073540e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48943057909563158917114503727080517958e-9),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.0903e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.33842514891989443409465171800884519331e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53264053296761245408991932692426094424e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23210520807186629205810670362048049836e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.71104271443590027208545022968625306496e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98781446716778138729774954595209697813e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98895829308616657174932023565302947632e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.25993639218721804661037829873135732687e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64669776700609853276056375742089715662e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.11846243382610611156151291892877027869e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.74830086064868141326053648144496072795e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07549997153431643849551871367000763445e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.10030596535721362628619523622308581344e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19376016170255697546854583591494809062e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52686177278870816414637961315363468426e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19872083945442288336636376283295310445e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26633866969676511944680471882188527224e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41261867539396133951024374504099977090e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18852182132645783844766153200510014113e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70152126044106007357033814742158353948e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.23810508827493234517751339979902448944e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.96161313274648769113605163816403306967e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06693316156193327359541953619174255726e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.79366356086062616343285660797389238271e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.14585835815353770175366834099001313472e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05314631662369743547568064896403143693e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90325380271096603676911761784650800378e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.36933359079566550212098911224675011839e-12),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.5015e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58950538583133457383574346194006716984e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25447644411503971725638816502617490834e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47605882774114100209665040117276675598e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12224864838900383464124716266085521485e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79164249640537972514574059182421325541e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89668438166714230032406615413991628135e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44410389750700463263686630222653669837e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14788978994687095829140113472609739982e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79821680629333600844514042061772236495e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.49636960435731257154960798035854124639e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70554745768928821263556963261516872171e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.42293994855343109617040824208078534205e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37599287094703195312894833570340165019e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35248179978735448062307216459232932068e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.53569375838863862590910010617140120876e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94337325681904859647161946168957959628e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77120402023938328899162557073347121463e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01644685191130734907530007424741314392e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.12479655123720440909164080517207084404e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25556010526357752360439314019567992245e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96143273204038192262150849394970544022e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.50612932318889495209230176354364299236e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12160918304376427109905628326638480473e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47696044292604039527013647985997661762e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.64067652576843720823459199100800335854e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.00745166063635113130434111509648306420e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05398901239421768403763864060147286105e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05698259572340563109985785513355912114e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19362835269415404005406782719825077472e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15444386779802728200716489787161419304e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02452666470008756043350040893761339083e-16),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0995e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.22517551525042172427941302520759668293e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86576974828476461442549217748945498966e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18419822818191546598384139622512477000e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98396184944524020019688823190946146641e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06686400532599396487775148973665625687e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05680178109228687159829475615095925679e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.17554487015345146749705505971350254902e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14774751685364429557883242232797329274e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33266124509168360207594600356349282805e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76332756800842989348756910429214676252e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60639771339252642992277508068105926919e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.41859490403554144799385471141184829903e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77177795293424055655391515546880774987e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76106923344461402353501262620681801053e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68829978902134103249656805130103045021e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42496376687241918803028631991083570963e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19213376162053391168605415200906099633e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65578261958732385181558047087365997878e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30046653564394292929001223763106276016e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48388301731958697028701215596777178117e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.50873786049439122933188684993719288258e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23255654647151798865208394342856435797e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20861791399969402003082323686080041040e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96882049090731653763684812243275884213e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98669985741073085290012296575736698103e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03383311816835346577432387682379226740e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.87320682938150375144724980774245810905e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13573468677076838075146150847170057373e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34526045003716422620879156626237175127e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35681579117696161282979297336282783473e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92944288060269290125987728528698476197e-18),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0937e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82527663232857270992129793621400616909e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41696401156754081476312871174198295322e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42036620449365724707919875710197564857e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67076745288708619632303078677641380627e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14278954094278648593125010577441869646e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40092485054621853149602511539550254471e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.17755660009065973828053533035808718033e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23871371557251644837598540542648782066e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04069998646037977439620128812310273053e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94055978349016208777803296823455779097e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29866428982892883091537921429389750973e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06056281963023929277728535486590256573e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57963857545037466186123981516026589992e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.81390322233700529779563477285232205886e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52190981930441828041102818178755246228e-31),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.70564782441895707961338319466546005093e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47770566490107388849474183308889339231e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29364672385303439788399215507370006639e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.37279274083988250795581105436675097881e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72124151284421794872333348562536468054e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96970247774973902625712414297788402746e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.38395055453444011915661055983937917120e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.19605460410208704830882138883730331113e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76945301389475508747530234950023648137e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33624384932503964160642677987886086890e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01155130710615988897664213446593907596e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03959317021567084067518847978890548086e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78213669817351488671519066803835958715e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75492332026736176991870807903277324902e-22),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.5856e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03264853379349880038687006045193401399e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79539964604630527636184900467871907171e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34840369549460790638336121351837912308e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73087351972154879439617719914590729748e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51775493325347153520115736204545037264e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60104651860674451546102708885530128768e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90233449697112559539826150932808197444e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06978852724410115655105118663137681992e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00399855296672416041126220131900937128e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18139748830278263202087699889457673035e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43070756487288399784700274808326343543e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70126687893706466023887757573369648552e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.29405234560873665664952418690159194840e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.69069082510020066864633718082941688708e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.33468198065176301137949068264633336529e-37),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51951069241510130465691156908893803280e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84647597299970149588010858770320631739e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90239396588176334117512714878489376365e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35551585337774834346900776840459179841e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53375746264539501168763602838029023222e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42421935941736734247914078641324315900e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23835501607741697737129504173606231513e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79603272375172813955236187874231935324e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44624821303153251954931367754173356213e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10635081308984534416704147448323126303e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.14627867347129520510628554651739571006e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43792928765659831045040802615903432044e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79856365207259871336606847582889916798e-25),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x * x * x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.5081e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[8] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.98413420602149016909919089901572802714e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30303835860684077803651094768293625633e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.89097726237252419724261295392691855545e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12696604472230480273239741428914666511e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84517621403071494824886152940942995151e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67577378292168927009421205756730205227e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16343347002845084264982358165052437094e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.59558963351172885545760841064831356701e3),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.51965956124978480521462518750569617550e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61700833299761977287211297600922591853e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.94988298508869748383898344668918510537e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.52494213749069142804725453333400335525e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20093079283917759611690534481918040882e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82564796242972192725215815897475246715e4),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x;
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x >= -1) {
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.7525e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(2.76859868856746781256e-1),
+            static_cast<RealType>(1.10489814676299003241e-1),
+            static_cast<RealType>(-6.25690643488236678667e-3),
+            static_cast<RealType>(-1.17905420222527577236e-3),
+            static_cast<RealType>(1.27188963720084274122e-3),
+            static_cast<RealType>(-7.20575105181207907889e-5),
+            static_cast<RealType>(-2.22575633858411851032e-5),
+            static_cast<RealType>(2.94270091008508492304e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.98673671503410894284e-1),
+            static_cast<RealType>(3.15907666864554716291e-1),
+            static_cast<RealType>(8.34463558393629855977e-2),
+            static_cast<RealType>(2.71804643993972494173e-2),
+            static_cast<RealType>(3.52187050938036578406e-3),
+            static_cast<RealType>(7.03072974279509263844e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.0995e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(2.14483832832989822788e-1),
+            static_cast<RealType>(3.72789690317712876663e-1),
+            static_cast<RealType>(1.86473650057086284496e-1),
+            static_cast<RealType>(1.31182724166379598907e-2),
+            static_cast<RealType>(-9.00695064809774432392e-3),
+            static_cast<RealType>(3.46884420664996747052e-4),
+            static_cast<RealType>(4.88651392754189961173e-4),
+            static_cast<RealType>(-6.13516242712196835055e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.06478618107122200489e0),
+            static_cast<RealType>(4.08809060854459518663e-1),
+            static_cast<RealType>(2.66617598099501800866e-1),
+            static_cast<RealType>(4.53526315786051807494e-2),
+            static_cast<RealType>(2.44078693689626940834e-2),
+            static_cast<RealType>(1.52822572478697831870e-3),
+            static_cast<RealType>(8.69480001029742502197e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType s = exp(2 * x * x * x / 27) * sqrt(-x);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.4768e-18
+            BOOST_MATH_STATIC const RealType P[8] = {
+                static_cast<RealType>(2.74308494787955998605e-1),
+                static_cast<RealType>(4.87765991440983416392e-1),
+                static_cast<RealType>(3.84524365110270427617e-1),
+                static_cast<RealType>(1.77409497505926097339e-1),
+                static_cast<RealType>(5.25612864287310961520e-2),
+                static_cast<RealType>(1.01528615034079765421e-2),
+                static_cast<RealType>(1.20417225696161842090e-3),
+                static_cast<RealType>(6.97462693097107007719e-5),
+            };
+            BOOST_MATH_STATIC const RealType Q[8] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(1.81256903248465876424e0),
+                static_cast<RealType>(1.43959302060852067876e0),
+                static_cast<RealType>(6.65882284117861804351e-1),
+                static_cast<RealType>(1.97537712781845593211e-1),
+                static_cast<RealType>(3.81732970028510912201e-2),
+                static_cast<RealType>(4.52767489928026542226e-3),
+                static_cast<RealType>(2.62240194911920120003e-4),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -8) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 1.5741e-17
+            BOOST_MATH_STATIC const RealType P[8] = {
+                static_cast<RealType>(2.67391547707456587286e-1),
+                static_cast<RealType>(3.39319035621314371924e-1),
+                static_cast<RealType>(1.85434799940724207230e-1),
+                static_cast<RealType>(5.63667456320679857693e-2),
+                static_cast<RealType>(1.01231164548944177474e-2),
+                static_cast<RealType>(1.02501575174439362864e-3),
+                static_cast<RealType>(4.60769537123286016400e-5),
+                static_cast<RealType>(-4.92754650783224582641e-13),
+            };
+            BOOST_MATH_STATIC const RealType Q[7] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(1.27271216837333318516e0),
+                static_cast<RealType>(6.96551952883867277759e-1),
+                static_cast<RealType>(2.11871363524516350422e-1),
+                static_cast<RealType>(3.80622887806509632537e-2),
+                static_cast<RealType>(3.85400280812991562328e-3),
+                static_cast<RealType>(1.73246593953823694311e-4),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -16) {
+            RealType t = -x - 8;
+
+            // Rational Approximation
+            // Maximum Relative Error: 4.6579e-17
+            BOOST_MATH_STATIC const RealType P[6] = {
+                static_cast<RealType>(2.66153901932100301337e-1),
+                static_cast<RealType>(1.65767350677458230714e-1),
+                static_cast<RealType>(4.19801402197670061146e-2),
+                static_cast<RealType>(5.39337995172784579558e-3),
+                static_cast<RealType>(3.50811247702301287586e-4),
+                static_cast<RealType>(9.21758454778883157515e-6),
+            };
+            BOOST_MATH_STATIC const RealType Q[6] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(6.23092941554668369107e-1),
+                static_cast<RealType>(1.57829914506366827914e-1),
+                static_cast<RealType>(2.02787979758160988615e-2),
+                static_cast<RealType>(1.31903008994475216511e-3),
+                static_cast<RealType>(3.46575870637847438219e-5),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -32) {
+            RealType t = -x - 16;
+
+            // Rational Approximation
+            // Maximum Relative Error: 5.2014e-17
+            BOOST_MATH_STATIC const RealType P[5] = {
+                static_cast<RealType>(2.65985830928929730672e-1),
+                static_cast<RealType>(7.19655029633308583205e-2),
+                static_cast<RealType>(7.26293125679558421946e-3),
+                static_cast<RealType>(3.24276402295343802262e-4),
+                static_cast<RealType>(5.40508013573989841127e-6),
+            };
+            BOOST_MATH_STATIC const RealType Q[5] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(2.70578525590448009961e-1),
+                static_cast<RealType>(2.73082032706004833847e-2),
+                static_cast<RealType>(1.21926059813954504560e-3),
+                static_cast<RealType>(2.03227900426552177849e-5),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x >= -1) {
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.2870e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76859868856746781256050397658493368372e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13037642242224438972685982606987140111e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.93206268361082760254653961897373271146e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12844418906916902333116398594921450782e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36889326770180267250286619759335338794e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.95272615884641416804001553871108995422e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.53808638264746233799776679481568171506e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.92177790427881393122479399837010657693e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93492737815019893169693306410980499366e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.87510085148730083683110532987841223544e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.28469424017979299382094276157986775969e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83693904015623816528442886551032709693e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77632857558257155545506847333166147492e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00448215148716947837105979735199471601e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.69069814466926608209872727645156315374e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89657828158127300370734997707096744077e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62713433978940724622996782534485162816e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.91600878366366974062522408704458777166e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.89144035500328704769924414014440238441e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35263616916053275381069097012458200491e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49136684724986851824746531490006769036e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65912003138912073317982729161392623277e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65931144405541620572732754508534372034e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40193555853535182510951061797573338442e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43625211359756249232841566256877823039e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33207781577559817130740123609636060998e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1977e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14483832832989822788477500521594411868e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75657192307644021285091474845448102656e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40437358470633234235031852091608646844e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66609942512054705023295445270747546208e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54563774151184610728476161049657676321e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51479057544157089574005315379453615537e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.59853789372610909788599341307719626846e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76919062715142378209907670793921883406e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58572738466179822770103948740437237476e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.66618046393835590932491510543557226290e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.26253044828460469263564567571249315188e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11130363073235247786909976446790746902e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.49023728251751416730708805268921994420e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.11919889346080886194925406930280687022e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99082771425048574611745923487528183522e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99525320878512488641033584061027063035e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.20775109959302182467696345673111724657e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67505804311611026128557007926613964162e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.77854913919309273628222660024596583623e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91661599559554233157812211199256222756e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.83924945472605861063053622956144354568e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.84286353909650034923710426843028632590e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57737060659799463556626420070111210218e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76047305116625604109657617040360402976e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.86975509621224474718728318687795215895e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71646204381423826495116781730719271111e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30359141441663007574346497273327240071e-9),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType s = exp(2 * x * x * x / 27) * sqrt(-x);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 5.4547e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[15] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74308494787955998605105974174143750745e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.56767876568276519015214709629156760546e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23402577465454790961498400214198520261e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09577559351834952074671208183548972395e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.76209118910349927892265971592071407626e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.09368637728788364895148841703533651597e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09003822946777310058789032386408519829e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02362804210869367995322279203786166303e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.67210045349462046966360849113168808620e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17170437120510484976042000272825166724e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62068279517157268391045945672600042900e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72238125522303876741011786930129571553e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33906175951716762094473406744654874848e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88118741063309731598638313174835288433e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78908322579081615215057968216358892954e-9),
+            };
+            BOOST_MATH_STATIC const RealType Q[15] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15777668058369565739250784347385217839e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58275582332060589146223977924181161908e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08890987062755381429904193744273374370e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53062680969750921573862970262146744660e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15983695707064161504470525373678920004e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09120624447001177857109399158887656977e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13566107440776375294261717406754395407e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50716565210262652091950832287627406780e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40417354541359829249609883808591989082e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.09285589734746898623782466689035549135e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.47580156629757526370271002425784456931e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03479533688660179064728081632921439825e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58728676819719406366664644282113323077e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72685000369623096389026353785111272994e-9),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -8) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 1.8813e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[15] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67391547707456587286086623414017962238e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.69944730920904699720804320295067934914e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80384452804523880914883464295008532437e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.74832028145199140240423863864148009059e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71728522451977382202061046054643165624e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.91023495084678296967637417245526177858e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57730498044529764612538979048001166775e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31940820074475947691746555183209863058e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54175805821840981842505041345112198286e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31350452337838677820161124238784043790e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.52175993144502511705213771924810467309e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.85684239411667243910736588216628677445e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27124210379062272403030391492854565008e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17645475312219452046348851569796494059e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06306499345515479193219487228315566344e-11),
+            };
+            BOOST_MATH_STATIC const RealType Q[15] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13521398369589479131299586715604029947e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17680254721938920978999949995837883884e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40693619288419980101309080614788657638e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44930162913500531579305526795523256972e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22044272115074113804712893993125987243e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.92745159832354238503828226333417152767e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24766164774700476810039401793119553409e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08325637569571782180723187639357833929e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74954547353553788519997212700557196088e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82800744682204649977844278025855329390e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20210992299988298543034791887173754015e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22996819257926038785424888617824130286e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42340212199922656577943251139931264313e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75700556749505163188370496864513941614e-11),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -16) {
+            RealType t = -x - 8;
+
+            // Rational Approximation
+            // Maximum Relative Error: 3.7501e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66153901932100301337118653561328446399e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52542079386371212946566450189144670788e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.17560936304516198261138159102435548430e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.26792904240601626330507068992045446428e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15418212265160879313643948347460896640e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05247220687656529725024232836519908641e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.64228534097787946289779529645800775231e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.85634097697132464418223150629017524118e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49585420710073223183176764488210189671e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.48871040740917898530270248991342594461e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.42577266655992039477272273926475476183e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19214263302271253341410568192952269518e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36635313919771528255819112450043338510e-12),
+            };
+            BOOST_MATH_STATIC const RealType Q[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32484755553196872705775494679365596205e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.17714315014480774542066462899317631393e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10789882607024692577764888497624620277e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09821963157449764169644456445120769215e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52354198870000121894280965999352991441e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12133327236256081067100384182795121111e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20187894923874357333806454001674518211e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69039238999927049119096278882765161803e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.35737444680219098802811205475695127060e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54403624143647064402264521374546365073e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.24233005893817070145949404296998119469e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.89735152971223120087721392400123727326e-12),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -32) {
+            RealType t = -x - 16;
+
+            // Rational Approximation
+            // Maximum Relative Error: 9.2696e-36
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65985830928929730672052407058361701971e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80409998734303497641108024806388734755e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.49286120625421787109350223436127409819e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.89160491404149422833016337592047445082e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16725811789351893632348469796802834008e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.43438517439595919021069131504449842238e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.29058184637190638359623120253986595623e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03288592271246432030980385908922413497e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12286831076824535034975676306286388291e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64563161552001551475186730009447111173e-11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13183856815615371136129883169639301710e-13),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.02405342795439598418033139109649640085e-35),
+            };
+            BOOST_MATH_STATIC const RealType Q[11] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.78286317363568496229516074305435186276e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06519013547074134846431611115576250187e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71907733798006110542919988654989891098e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38874744033460851257697736304200953873e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.54724289412996188575775800547576856966e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98922099980447626797646560786207812928e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64352676367403443733555974471752023206e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92616898324742524009679754162620171773e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87471345773127482399498877510153906820e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92954174836731254818376396170511443820e-12),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -64) {
+            RealType t = -x - 32;
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.3524e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[10] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65964563346442080104568381680822923977e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.77958685324702990033291591478515962894e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.56419338083136866686699803771820491401e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.82465178504003399087279098324316458608e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92402911374159755476910533154145918079e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91224450962405933321548581824712789516e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.84063939469145970625490205194192347630e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.15300528698702940691774461674788639801e-11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.85553643603397817535280932672322232325e-13),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.46207029637607033398822620480584537642e-38),
+            };
+            BOOST_MATH_STATIC const RealType Q[9] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54906717312241693103173902792310528801e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84408124581401290943345932332007045483e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81403744024723164669745491417804917709e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23423244618880845765135047598258754409e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84697524433421334697753031272973192290e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94803525968789587050040294764458613062e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68948879514200831687856703804327184420e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07366525547027105672618224029122809899e-12),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53> &tag) {
+    if (x >= 0) {
+        return mapairy_pdf_plus_imp_prec<RealType>(x, tag);
+    }
+    else if (x <= 0) {
+        return mapairy_pdf_minus_imp_prec<RealType>(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>& tag) {
+    if (x >= 0) {
+        return mapairy_pdf_plus_imp_prec<RealType>(x, tag);
+    }
+    else if (x <= 0) {
+        return mapairy_pdf_minus_imp_prec<RealType>(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_pdf_imp(const mapairy_distribution<RealType, Policy>& dist, const RealType& x) {
+    //
+    // This calculates the pdf of the Map-Airy distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::pdf(mapairy<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale;
+
+    result = mapairy_pdf_imp_prec(u, tag_type()) / scale;
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 2.9194e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(3.33333333333333333333e-1),
+            static_cast<RealType>(7.49532137610545010591e-2),
+            static_cast<RealType>(9.25326921848155048716e-3),
+            static_cast<RealType>(6.59133092365796208900e-3),
+            static_cast<RealType>(-5.21942678326323374113e-4),
+            static_cast<RealType>(8.22766804917461941348e-5),
+            static_cast<RealType>(-3.97941251650023182117e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.17408156824742736411e-1),
+            static_cast<RealType>(3.57041011418415988268e-1),
+            static_cast<RealType>(1.04580353775369716002e-1),
+            static_cast<RealType>(1.87521616934129432292e-2),
+            static_cast<RealType>(2.33232161135637085535e-3),
+            static_cast<RealType>(7.31285352607895467310e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.1531e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(1.84196970581015939888e-1),
+            static_cast<RealType>(-1.19398028299089933853e-3),
+            static_cast<RealType>(1.21954054797949597854e-2),
+            static_cast<RealType>(-9.37912675685073154845e-4),
+            static_cast<RealType>(1.66651954077980453212e-4),
+            static_cast<RealType>(-1.33271812303025233648e-5),
+            static_cast<RealType>(5.35982226125013888796e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.70352826101668448273e-1),
+            static_cast<RealType>(1.98852010141232271304e-1),
+            static_cast<RealType>(3.64864882318453496161e-2),
+            static_cast<RealType>(4.22173125405065522298e-3),
+            static_cast<RealType>(1.20079284386796600356e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.8348e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.07409273397524124098e-1),
+            static_cast<RealType>(3.83900318969331880402e-2),
+            static_cast<RealType>(1.17926652359826576790e-2),
+            static_cast<RealType>(1.52181625871479030046e-3),
+            static_cast<RealType>(1.50703424417132565662e-4),
+            static_cast<RealType>(2.10117959279448106308e-6),
+            static_cast<RealType>(1.97360985832285866640e-8),
+            static_cast<RealType>(-1.06076300080048408251e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.54435380513870673497e-1),
+            static_cast<RealType>(3.66021233157880878411e-1),
+            static_cast<RealType>(9.42985570806905160687e-2),
+            static_cast<RealType>(1.54122343653998564507e-2),
+            static_cast<RealType>(1.49849056258932455548e-3),
+            static_cast<RealType>(6.94290406268856211707e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.6624e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(4.70720199535228802538e-2),
+            static_cast<RealType>(2.67200763833749070079e-2),
+            static_cast<RealType>(7.37400551855064729769e-3),
+            static_cast<RealType>(1.10592441765001623699e-3),
+            static_cast<RealType>(9.15846028547400212588e-5),
+            static_cast<RealType>(3.17801522553862136789e-6),
+            static_cast<RealType>(2.03102753319827713542e-8),
+            static_cast<RealType>(-5.16172854149066643529e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(9.05317644829451086870e-1),
+            static_cast<RealType>(3.73713496637025562492e-1),
+            static_cast<RealType>(8.94434672792094976627e-2),
+            static_cast<RealType>(1.31846542255347106087e-2),
+            static_cast<RealType>(1.16680596342421447100e-3),
+            static_cast<RealType>(5.44719256441278863300e-5),
+            static_cast<RealType>(8.73131209154185067287e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.6243e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.74847564444513000450e-2),
+            static_cast<RealType>(6.00209162595027323742e-3),
+            static_cast<RealType>(7.86550260761375576075e-4),
+            static_cast<RealType>(4.46682547335758521734e-5),
+            static_cast<RealType>(9.51329761417139273391e-7),
+            static_cast<RealType>(4.10313065114362712333e-9),
+            static_cast<RealType>(-9.81286503831545640189e-12),
+            static_cast<RealType>(2.98763969872672156104e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.27732094554221674504e-1),
+            static_cast<RealType>(1.14330643482604301178e-1),
+            static_cast<RealType>(1.27722341942374066265e-2),
+            static_cast<RealType>(7.54563340152441778517e-4),
+            static_cast<RealType>(2.13377039814057925832e-5),
+            static_cast<RealType>(2.09670987094350618690e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.4684e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(6.22684103170563193015e-3),
+            static_cast<RealType>(1.34714356588780958096e-3),
+            static_cast<RealType>(9.51289465377874891896e-5),
+            static_cast<RealType>(2.64918464474843134081e-6),
+            static_cast<RealType>(2.66703857491046795285e-8),
+            static_cast<RealType>(5.42037888457985833156e-11),
+            static_cast<RealType>(-6.18017115447736427379e-14),
+            static_cast<RealType>(9.11626234402148561268e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.09895694991285975774e-1),
+            static_cast<RealType>(3.69874670435930773471e-2),
+            static_cast<RealType>(2.15708854325146400153e-3),
+            static_cast<RealType>(6.35345408451056881884e-5),
+            static_cast<RealType>(8.65722805575670770555e-7),
+            static_cast<RealType>(4.03153189557220023202e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.5947e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(2.20357145727036120652e-3),
+            static_cast<RealType>(1.45412555771401325111e-4),
+            static_cast<RealType>(3.27819006009093198652e-6),
+            static_cast<RealType>(2.96786786716623870006e-8),
+            static_cast<RealType>(9.54192199129339742308e-11),
+            static_cast<RealType>(5.71421706870777687254e-14),
+            static_cast<RealType>(-1.48321866072033823195e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.12851983233980279746e-1),
+            static_cast<RealType>(4.94650928817638043712e-3),
+            static_cast<RealType>(1.05447405092956497114e-4),
+            static_cast<RealType>(1.11578464291338271178e-6),
+            static_cast<RealType>(5.27522295397347842625e-9),
+            static_cast<RealType>(7.95786524903707645399e-12),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType x_cube = x * x * x;
+        RealType t = static_cast<RealType>((boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.2709e-17
+        BOOST_MATH_STATIC const RealType P[4] = {
+            static_cast<RealType>(3.98942280401432677940e-1),
+            static_cast<RealType>(2.89752186412133782995e-2),
+            static_cast<RealType>(4.67360459917040710474e0),
+            static_cast<RealType>(-1.26770824563800250704e-1),
+        };
+        BOOST_MATH_STATIC const RealType Q[3] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(7.26301023103568827709e-2),
+            static_cast<RealType>(1.60899894281099149848e1),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t;
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 1) {
+        // Rational Approximation
+        // Maximum Relative Error: 4.7720e-37
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33333333333333333333333333333333333333e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38519736580901276671338330967060054188e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07012342772403725079487012557507575976e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70163612228825567572185033570526547856e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16393313438726572630782132625753922397e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92141312947853945617138019222992750592e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16513062047959961711747864068554379374e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08850391017085844154857927364247623649e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07060872491334153829857156707699441084e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.56961733740920438026573722084839596926e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.93626747947476815631021107726714283086e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32967164823609209711923411113824666288e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.23420723211833268177898025846064230665e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13807083548358335699029971528179486964e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00810772528427939684296334977783425582e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.24383652800043768524894854013745098654e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64696616559657052516796844068580626381e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.62288747679271039067363492752820355369e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19311779292286492714550084942827207241e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48436879303839576521077892946281025894e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.28665316157256311138787387605249076674e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.36350302380845433472593647100484547496e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.05835458213330488018147374864403662878e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.13919959493955187399856105325181806876e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30960533107704070411766556906543316310e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.6297e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84196970581015939887507434989936103587e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23864910443500344832158256856064580005e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.72066675347648126090497588433854314742e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81712740200456564860442639192891089515e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.39091197181834765859741334477680768031e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.03759464781707198959689175957603165395e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15298069568149410830642785868857309358e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18910514301176322829267019223946392192e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16851691488007921400221017970691227149e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82031940093536875619655849638573432722e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30042143299959913519747484877532997335e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.19848671456872291336347012756651759817e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00479393063394570750334218362674723065e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24929390929112144560152115661603117364e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.34853762543033883106055186520573363290e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.73783624941936412984356492130276742707e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23224734370942016023173307854505597524e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11116448823067697039703254343621931158e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12490054037308798338231679733816982120e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.38701607014856856812627276285445001885e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10075199231657382435402462616587005087e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.43662615015322880941108094510531477066e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37981396630189761210639158952200945512e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55820444854396304928946970937054949160e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.8103e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07409273397524124098315500450332255837e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.98373054365213259465522536994638631699e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.30851284606709136235419547406278197945e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92686617543233900289721448026065555990e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.18056394312484073294780140350522772329e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07058343449035366484618967963264380933e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71636108080692802684712497501670425230e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13155853034615230731719317488499751231e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.00070273388376168880473457782396672044e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35528857373910910704625837069445190727e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.99897218751541535347315078577172104436e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35092090729912631973050415647154137571e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72220647682193638971237255396233171508e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.45008884108655511268690849420714428764e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42652074703683973183213296310906006173e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03479786698331153607905223548719296572e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95556520914240562719970700900964416000e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73127917601685318803655745157828471269e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.63007065833918179119250623000791647836e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.70652732923091039268400927316918354628e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60880782675229297981880241245777122866e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09979261868403910549978204036056659380e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12085610111710889118562321318284539217e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59811533082647193392924345081953134304e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37211668706684650035086116219257276925e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62479830409039340826066305367893543134e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.22039803134898937546371285610102850458e-11),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.5930e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.70720199535228802537946089633331273434e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85220706158476482443562698303252970927e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55090221054465759649629178911450010833e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.70398047783095186291450019612979853708e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11846661331973171721224034349719801691e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83195024406409870789088752469490824640e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.23908312140480103249294791529383548724e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40765128885655152415228193255890859830e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.14294523267278070539100529759317560119e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26815059429007745850376987481747435820e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.28142945635159623618312928455133399240e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77079683180868753715374495747422819326e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73710011278079325323578951018770847628e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.70140037580287364298206334732060874507e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36848014038411798213992770858203510748e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.15373052017549822413011375404872359177e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92705544967513282963463451395766172671e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19899290805598090502434290420047460406e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74002906913724742582773116667380578990e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80632456977494447641985312297971970632e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53381530665983535467406445749348183915e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.86606180756179817016240556949228031340e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.49594666942152749850479792402078560469e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.25231012522695972983928740617341887334e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34987086926725472733984045599487947378e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58286136970918021841189712851698747417e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12238357666199366902936267515573231037e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82464168044335183356132979380360583444e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40073718480172265670072434562833527076e-17),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.3609e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74847564444513000450056174922427854591e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56842503159303803254436983444304764079e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.48504629497687889354406208309334148575e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62327083507366120871877936416427790391e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72062210557023828776202679230979309963e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19153025667221102770398900522196418041e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66248482185063262034022017727375829162e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57390218395059632327421809878050974588e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.45520328522839835737631604118833792570e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76327978880339919462910339138428389322e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99700625463451418394515481232159889297e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82943476668680389338853032002472541164e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19415284760817575957617090798914089413e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17080879333540200065368097274334363537e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.09912208206107606750610288716869139753e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.98451733054622166748935243139556132704e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08148065380582488495702136465010348576e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42385352331252779422725444021027377277e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66510412535270623169792008730183916611e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47952712144801508762945315513819636452e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.20703092334999244212988997416711617790e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71658889250345012472529115544710926154e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.63905601023452497974798277091285373919e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76730409484335386334980429532443217982e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19139408077753398896224794522985050607e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.58025872548387600940275201648443410419e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.11369336267349152895272975096509109414e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.56182954522937999103610817174373785571e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.35452907177197742692545044913125982311e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23587924912460218189929226092439805175e-17),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.7192e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.22684103170563193014558918295924551173e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55222688816852408105912768186300290291e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.60747505331765587662432023547517953629e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80463770266821887100086895337451846880e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.19190824169154471000496746227725070963e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40646301571395681364881852739555404287e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15408836734496798091749932018121879724e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13676779930022341958128426888835497781e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02435098103190516418351075792372986932e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.82018920071479061978244972592746216377e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26435061215428679536159320644587957335e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05298407883178633891153989998582851270e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61156860101928352010449210760843428372e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.02156808288545876198121127510075217184e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65549196385656698597261688277898043367e-30),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03426141030409708635168766288764563749e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13808987755928828118915442251025992769e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52253239792170999949444502938290297674e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33720936468171204432499390745432338841e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.08713980159883984886576124842631646880e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43652846144339754840998823540656399165e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02849693617024492825330133490278326951e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14110017452008167954262319462808192536e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.01462578814695350559338360744897649915e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73495817568046489613308117490508832084e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47445372925844096612021093857581987132e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08200002287534174751275097848899176785e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15305756373406702253187385797525419287e-21),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.7799e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20357145727036120652264700679701054983e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95712324967981162396595365933255312698e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08619492652809635942960438372427086939e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37140224583881547818087260161723208444e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83073777522092069988595553041062506001e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.00473542739040742110568810201412321512e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.47447289601822506789553624164171452120e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70913574957198131397471307249294758738e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36538119628489354953085829178695645929e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00763343664814257170332492241110173166e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62297585950798764290583627210836077239e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15780217054514513493147192853488153246e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31961589164397397724611386366339562789e-28),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26440207646105117747875545474828367516e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27872879091838733280518786463281413334e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34256572873114675776148923422025029494e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13595637397535037957995766856628205747e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33745879863685053883024090247009549434e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41792226523670940279016788831933559977e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.03966147662273388060545199475024100492e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62177951640260313354050335795080248910e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50650165210517365082118441264513277196e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48413283257020741389298806290302772976e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16439276222123152748426700489921412654e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24969602890963356175782126478237865639e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08681155203261739689727004641345513984e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28282024196484688479115133027874255367e-30),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType x_cube = x * x * x;
+        RealType t = (boost::math::isnormal)(x_cube) ? 1 / sqrt(x_cube) : 1 / pow(sqrt(x), 3);
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.5865e-37
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[8] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98942280401432677939946059934381868476e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12426566605292130233061857505057433291e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91574528280329492283287073127040983832e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69914217884224943794012165979483573091e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30178902028403564086640591437738216288e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96515490341559353794378324810127583810e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44343825578434751356083230369361399507e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.07224810408790092272497403739984510394e2),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32474438135610721926278423612948794250e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27594461167587027771303292526448542806e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49207539478843628626934249487055017677e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75094412095634602055738687636893575929e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.51642534474780515366628648516673270623e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05977615003758056284424652420774587813e4),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t;
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x >= -1) {
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.6964e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(4.23238998449671083670e-1),
+            static_cast<RealType>(4.95353582976475183891e-1),
+            static_cast<RealType>(2.45823281826037784270e-1),
+            static_cast<RealType>(7.29726507468813920788e-2),
+            static_cast<RealType>(1.63332856186819713346e-2),
+            static_cast<RealType>(2.82514634871307516142e-3),
+            static_cast<RealType>(2.66220579589280704089e-4),
+            static_cast<RealType>(3.09442180091323751049e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.16241922223786900600e-1),
+            static_cast<RealType>(2.75690727171711638879e-1),
+            static_cast<RealType>(7.18707184893542884080e-2),
+            static_cast<RealType>(1.87136800286819336797e-2),
+            static_cast<RealType>(2.38383441176345054929e-3),
+            static_cast<RealType>(3.23509126477812051983e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.8303e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(1.62598955251978523175e-1),
+            static_cast<RealType>(2.30154661502402196205e-1),
+            static_cast<RealType>(1.29233975368291684522e-1),
+            static_cast<RealType>(3.80919553916980965587e-2),
+            static_cast<RealType>(8.17724414618808505948e-3),
+            static_cast<RealType>(1.95816800210481122544e-3),
+            static_cast<RealType>(3.35259917978421935141e-4),
+            static_cast<RealType>(1.22071311320012805777e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(9.63771793313770952352e-2),
+            static_cast<RealType>(2.23602260938227310054e-1),
+            static_cast<RealType>(9.21944797677283179038e-3),
+            static_cast<RealType>(1.82181136341939651516e-2),
+            static_cast<RealType>(1.11216849284965970458e-4),
+            static_cast<RealType>(5.57446347676836375810e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType s = exp(2 * x * x * x / 27) / sqrt(-x * x * x);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 3.6017e-18
+            BOOST_MATH_STATIC const RealType P[8] = {
+                static_cast<RealType>(8.31806744221966404520e-1),
+                static_cast<RealType>(1.34481067378012055850e0),
+                static_cast<RealType>(9.12139427469494995264e-1),
+                static_cast<RealType>(3.59706159222491124928e-1),
+                static_cast<RealType>(9.48836332725688279299e-2),
+                static_cast<RealType>(1.68259594978853951234e-2),
+                static_cast<RealType>(1.89700733471520162946e-3),
+                static_cast<RealType>(1.13854052826846329787e-4),
+            };
+            BOOST_MATH_STATIC const RealType Q[8] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(1.29694286517571741097e0),
+                static_cast<RealType>(7.99686735441213882518e-1),
+                static_cast<RealType>(3.08198207583883597188e-1),
+                static_cast<RealType>(7.97230139795658588972e-2),
+                static_cast<RealType>(1.40742142048849462162e-2),
+                static_cast<RealType>(1.58411440546277691506e-3),
+                static_cast<RealType>(9.51560785730564046338e-5),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -8) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 1.3504e-17
+            BOOST_MATH_STATIC const RealType P[8] = {
+                static_cast<RealType>(1.10294551528734705946e0),
+                static_cast<RealType>(1.26696377028973554615e0),
+                static_cast<RealType>(6.63115985833429688941e-1),
+                static_cast<RealType>(2.06289793717379095832e-1),
+                static_cast<RealType>(4.11977615717846276227e-2),
+                static_cast<RealType>(5.28620928618550859827e-3),
+                static_cast<RealType>(4.04328442334023561279e-4),
+                static_cast<RealType>(1.42364413902075896503e-5),
+            };
+            BOOST_MATH_STATIC const RealType Q[8] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(1.09709853682665798542e0),
+                static_cast<RealType>(5.63687797989627787500e-1),
+                static_cast<RealType>(1.73604358560002859604e-1),
+                static_cast<RealType>(3.44985744385890794044e-2),
+                static_cast<RealType>(4.41683993064797272821e-3),
+                static_cast<RealType>(3.37834206192286709492e-4),
+                static_cast<RealType>(1.18951465786445720729e-5),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -16) {
+            RealType t = -x - 8;
+
+            // Rational Approximation
+            // Maximum Relative Error: 8.8272e-18
+            BOOST_MATH_STATIC const RealType P[7] = {
+                static_cast<RealType>(1.18246847255744057280e0),
+                static_cast<RealType>(8.41320657699741240497e-1),
+                static_cast<RealType>(2.55093097377551881478e-1),
+                static_cast<RealType>(4.21261576802732715976e-2),
+                static_cast<RealType>(3.98805044659990523312e-3),
+                static_cast<RealType>(2.04688276265993954527e-4),
+                static_cast<RealType>(4.43354791268634655473e-6),
+            };
+            BOOST_MATH_STATIC const RealType Q[7] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(7.07103973315808077783e-1),
+                static_cast<RealType>(2.13664682181055450396e-1),
+                static_cast<RealType>(3.52218225168465984709e-2),
+                static_cast<RealType>(3.33218664347896435919e-3),
+                static_cast<RealType>(1.71025807471868853268e-4),
+                static_cast<RealType>(3.70441884597642042665e-6),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -32) {
+            RealType t = -x - 16;
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.6236e-18
+            BOOST_MATH_STATIC const RealType P[6] = {
+                static_cast<RealType>(1.19497306481411168356e0),
+                static_cast<RealType>(3.90497195765498241356e-1),
+                static_cast<RealType>(5.13120330037626853257e-2),
+                static_cast<RealType>(3.38574023921119491471e-3),
+                static_cast<RealType>(1.12075935888344736993e-4),
+                static_cast<RealType>(1.48743616420183584738e-6),
+            };
+            BOOST_MATH_STATIC const RealType Q[6] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(3.26493785348088598123e-1),
+                static_cast<RealType>(4.28813205161574223713e-2),
+                static_cast<RealType>(2.82893073845390254969e-3),
+                static_cast<RealType>(9.36442365966638579335e-5),
+                static_cast<RealType>(1.24281651532469125315e-6),
+            };
+
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_minus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x >= -1) {
+        RealType t = x + 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0688e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23238998449671083670041452413316011920e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.14900991369455846775267187236501987891e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19132787054572299485638029221977944555e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87295743700300806662745209398368996653e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.41994520703802035725356673887766112213e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78782099629586443747968633412271291734e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05200546520666366552864974572901349343e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.51453477916196630939702866688348310208e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15461354910584918402088506199099270742e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43371674256124419899137414410592359185e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35849788347057186916350200082990102088e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.50359296597872967493549820191745700442e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21838020977580479741299141050400953125e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.46723648594704078875476888175530463986e-12),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.98700317671474659677458220091101276158e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00405631175818416028878082789095587658e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04189939150805562128632256692765842568e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.03621065280443734565418469521814125946e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85722257874304617269018116436650330070e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.24191409213079401989695901900760076094e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.64269032641964601932953114106294883156e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19289631274036494326058240677240511431e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41389309719775603006897751176159931569e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42000309062533491486426399210996541477e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.02436961569668743353755318268149636644e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50130875023154569442119099173406269991e-9),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x >= -2) {
+        RealType t = x + 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.1815e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62598955251978523174755901843430986522e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08127698872954954678270473317137288772e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70144997468767751317246482211703706086e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49486603823046766249106014234315835102e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.07186495389828596786579668258622667573e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98334953533562948674335281457057445421e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.44119017374895211020429143034854620303e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27080759819117162456137826659721634882e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53892796920597912362370019918933112349e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30530442651657077016130554430933607143e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.04837779538527662990102489150650534390e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.94354615171320374997141684442120888127e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30746545799073289786965697800049892311e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41870129065056783732691371215602982173e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.75919235734607601884356783586727272494e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.57656678936617227532275100649989944452e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72617552401870454676736869003112018648e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.59238104942208254162102314312757621047e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06040513359343987972917295603514840777e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.26922840063034349024167652148593396307e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25628506630180107357627955876231943531e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81600387497542714853225329159728694926e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08210973846891324886779444820838563800e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68632477858150229792523037059221563861e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43542789104866782087701759971538600076e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70594730517167328271953424328890849790e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30162314557860623869079601905904538470e-9),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType s = exp(2 * x * x * x / 27) / sqrt(-x * x * x);
+
+        if (x >= -4) {
+            RealType t = -x - 2;
+
+            // Rational Approximation
+            // Maximum Relative Error: 6.4678e-36
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[16] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.31806744221966404520449104514474066823e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50292887071777664663197915067642779665e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45140067157601150721516139901304901854e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93227973605511286112712730820664209900e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74259108933048973391560053531348126900e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77677252890665602191818487592154553094e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71843197238558832510595724454548089268e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.62811778285151415483649897138119310816e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74127763877120261698596916683136227034e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24832552591462216226478550702845438540e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.93381036027487259940171548523889481080e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02261328789519398745019578211081412570e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.75409238451885381267277435341417474231e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.09526311389365895099871581844304449319e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96371756262605118060185816854433322493e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.88472819535099746216179119978362211227e-10),
+            };
+            BOOST_MATH_STATIC const RealType Q[16] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.68923525157720774962908922391133419863e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40714902096062779527207435671907059131e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73120596883364361220343183559076165363e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56331267512666685349409906638266569733e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80956276267438042306216894159447642323e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34213468750936211385520570062547991332e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.50081600968590616549654807511166706919e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47297208240850928379158677132220746750e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.73392976579560287571141938466202325901e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13858821123741335782695407397784840241e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.04103497389656828224053882850778186433e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81040189127998139689091455192659191796e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42176283104790992634826374270801565123e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64077137545614380065714904794220228239e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08139724991616322332901357866680220241e-10),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -8) {
+            RealType t = -x - 4;
+
+            // Rational Approximation
+            // Maximum Relative Error: 3.5975e-36
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[16] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10294551528734705945662709421382590676e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.26135857114883288617323671166863478751e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23504465936865651893193560109437792738e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41598983788870071301270649341678962009e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43871304031224174103636568402522086316e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22745720458050596514499383658714367529e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05976431838299244997805790000128175545e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32087500190238014890030606301748111874e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32754445514451500968404092049839985196e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31866016448921762610690552586049011375e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.80197257671079297305525087998125408939e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.44212088947602969374978384512149432847e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38857170416924025226203571589937286465e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20239999218390467567339789443070294182e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93965060142992479149039624149602039394e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36407983918582149239548869529460234702e-12),
+            };
+            BOOST_MATH_STATIC const RealType Q[16] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99867960957804580209868321228347067213e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.94236623527818880544030470097296139679e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21644866845440678050425616384656052588e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48653919287388803523090727546630060490e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88696531788490258477870877792341909659e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11157356307921406032115084386689196255e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11071149696069503480091810333521267753e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95274844731679437274609760294652465905e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.77971280158253322431071249000491659536e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18092258028773913076132483326275839950e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87773088535057996947643657676843842076e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99609277781492599950063871899582711550e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00465660598924300723542908245498229301e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.29174652982710100418474261697035968379e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31746082236506935340972706820707017875e-12),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -16) {
+            RealType t = -x - 8;
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.6792e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[15] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18246847255744057280356900905660312795e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77955886026107125189834586992142580148e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24948425302263641813107623611637262126e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.42593662659560333324287312162818766556e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62714138002904073145045478360748042164e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56008984285541289474850396553042124777e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84858048549330525583286373950733005244e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30578460156038467943968005946143934751e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93974868941529258700281962314167648967e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95086664204515648622431580749060079100e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57811968176644056830002158465591081929e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27814751838906948007289825582251221538e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06762893426725920159998333647896590440e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15388861641344998301210173677051088515e-12),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83956842740198388242245209024484381888e-29),
+            };
+            BOOST_MATH_STATIC const RealType Q[14] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50056124032615852703112365430040751173e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05112559537845833793684655693572118348e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.55609497026127521043140534271852131858e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36430652394614121156238070755223942728e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98167215021940993097697777547641188697e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.89418831310297071347013983522734394061e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.10980717618462843498917526227524790487e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80119618735773019675212434416594954984e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13748676086657187580746476165248613583e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15424395860921826755718081823964568760e-7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75228896859720124469916340725146705309e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72759238269315282789451836388878919387e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79966603543593799412565926418879689461e-12),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -32) {
+            RealType t = -x - 16;
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.1744e-36
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19497306481411168355692832231058399132e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.15593166681833539521403250736661720488e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54020260738207743315755235213180652303e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76467972857585566189917087631621063058e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.97922795572348613358915532172847895070e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26998967192207380100354278434037095729e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32827180395699855050424881575240362199e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50587178182571637802022891868380669565e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78252548290929962236994183546354358888e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01519297007773622283120166415145520855e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29602226691665918537895803270497291716e-11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.53666531487585211574942518181922132884e-14),
+            };
+            BOOST_MATH_STATIC const RealType Q[12] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.82230649578130958108098853863277631065e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12412482738973738235656376802445565005e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98320116955422615960870363549721494683e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99756654189000467678223166815845628725e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40414942475279981724792023159180203408e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78118445466942812088955228016254912391e-5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25827002637577602812624580692342616301e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.99604467789028963216078448884632489822e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.48237134334492289420105516726562561260e-10),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08288201960155447241423587030002372229e-11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.29720612489952110448407063201146274502e-14),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else if (x >= -64) {
+            RealType t = -x - 32;
+
+            // Rational Approximation
+            // Maximum Relative Error: 3.4699e-36
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[10] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19659414007358083585943280640656311534e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36969140730640253987817932335415532846e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21946928005759888612066397569236165853e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08341720579009422518863704766395201498e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44908614491286780138818989614277172709e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54172482866925057749338312942859761961e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.49281630950104861570255344237175124548e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27586759709416364899010676712546639820e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00054716479138657682306851175059678989e-11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48798342894235412426464893852098239746e-14),
+            };
+            BOOST_MATH_STATIC const RealType Q[10] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.81588658109851219975949691772676519853e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52583331848892383968186924120872369151e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57644670426430994363913234422346706991e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21080164634428298820141591419770346977e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79484074949580980980061103238709314326e-6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.93167250146504946763386377338487557826e-8),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06604193118724797924138056151582242604e-9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35999937789324222934257460080153249173e-12),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.91435929481043135336094426837156247599e-14),
+            };
+            // LCOV_EXCL_STOP
+            result = s * tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+        }
+        else {
+            result = 0;
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 53>& tag) {
+    if (x >= 0) {
+        return complement ? mapairy_cdf_plus_imp_prec(x, tag) : 1 - mapairy_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - mapairy_cdf_minus_imp_prec(x, tag) : mapairy_cdf_minus_imp_prec(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 113>& tag) {
+    if (x >= 0) {
+        return complement ? mapairy_cdf_plus_imp_prec(x, tag) : 1 - mapairy_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - mapairy_cdf_minus_imp_prec(x, tag) : mapairy_cdf_minus_imp_prec(x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_cdf_imp(const mapairy_distribution<RealType, Policy>& dist, const RealType& x, bool complement) {
+    //
+    // This calculates the cdf of the Map-Airy distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::cdf(mapairy<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale;
+
+    result = mapairy_cdf_imp_prec(u, complement, tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.375) {
+        RealType t = p - static_cast <RealType>(0.375);
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.5488e-18
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(-1.17326074020471664075e0),
+            static_cast<RealType>(1.51461298154568349598e0),
+            static_cast<RealType>(1.19979368094343490487e1),
+            static_cast<RealType>(-5.94882121521324108164e0),
+            static_cast<RealType>(-2.20619749774447254528e1),
+            static_cast<RealType>(7.17766543775229176131e0),
+            static_cast<RealType>(4.79284243496552841508e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.76268072706610602584e0),
+            static_cast<RealType>(-4.88492535243404839734e0),
+            static_cast<RealType>(-5.67524172432687656881e0),
+            static_cast<RealType>(6.83327389947131710596e0),
+            static_cast<RealType>(2.91338085774159042709e0),
+            static_cast<RealType>(-1.41108918944159283950e0),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.25) {
+        RealType t = p - static_cast <RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.5181e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(-1.63281240925531302762e0),
+            static_cast<RealType>(-4.92351310795930780147e0),
+            static_cast<RealType>(1.43448529253101759409e1),
+            static_cast<RealType>(3.33182629948094299473e1),
+            static_cast<RealType>(-3.06679026539368582747e1),
+            static_cast<RealType>(-2.87298447423841965301e1),
+            static_cast<RealType>(1.31575930750093554120e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.38761652244702318296e0),
+            static_cast<RealType>(2.40932080746189543284e0),
+            static_cast<RealType>(-1.69465870062123632126e1),
+            static_cast<RealType>(-6.39998944283654848809e0),
+            static_cast<RealType>(1.27168434054332272391e1),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - static_cast <RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.3028e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-2.18765177572396469657e0),
+            static_cast<RealType>(-3.65752788934974426531e1),
+            static_cast<RealType>(-1.81144810822028903904e2),
+            static_cast<RealType>(-1.22434531262312950288e2),
+            static_cast<RealType>(8.99451018491165823831e2),
+            static_cast<RealType>(9.11333307522308410858e2),
+            static_cast<RealType>(-8.76285742384616909177e2),
+            static_cast<RealType>(-2.33786726970025938837e2),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.91797638291395345792e1),
+            static_cast<RealType>(1.24293724082506952768e2),
+            static_cast<RealType>(2.82393116012902543276e2),
+            static_cast<RealType>(-1.80472369158936285558e1),
+            static_cast<RealType>(-5.31764390192922827093e2),
+            static_cast<RealType>(-5.60586018315854885788e1),
+            static_cast<RealType>(1.21284324755968033098e2),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.1147e-18
+        BOOST_MATH_STATIC const RealType P[6] = {
+            static_cast<RealType>(-2.18765177572396470773e0),
+            static_cast<RealType>(-2.19887766409334094428e0),
+            static_cast<RealType>(-7.77080107207360785208e-1),
+            static_cast<RealType>(-1.15551765136654549650e-1),
+            static_cast<RealType>(-6.64711321022529990367e-3),
+            static_cast<RealType>(-9.74212491048543799073e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(7.91919722132624625590e-1),
+            static_cast<RealType>(2.17415447268626558639e-1),
+            static_cast<RealType>(2.41474762519410575392e-2),
+            static_cast<RealType>(9.41084107182696904714e-4),
+            static_cast<RealType>(6.65754108797614202364e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0508e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-2.59822399410385085335e0),
+            static_cast<RealType>(-2.24306757759003016244e0),
+            static_cast<RealType>(-7.36208578161752060979e-1),
+            static_cast<RealType>(-1.15130762650287391576e-1),
+            static_cast<RealType>(-8.77652386123688618995e-3),
+            static_cast<RealType>(-2.96358888256575251437e-4),
+            static_cast<RealType>(-3.33661282483762192446e-6),
+            static_cast<RealType>(-4.19292241201527861927e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(7.23065798041556418844e-1),
+            static_cast<RealType>(1.96731305131315877264e-1),
+            static_cast<RealType>(2.49952034298034383781e-2),
+            static_cast<RealType>(1.49149568322111062242e-3),
+            static_cast<RealType>(3.66010398525593921460e-5),
+            static_cast<RealType>(2.46857713549279930857e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.1997e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-3.67354365380697580447e0),
+            static_cast<RealType>(-1.52181685844845957618e0),
+            static_cast<RealType>(-2.40883948836320845233e-1),
+            static_cast<RealType>(-1.82424079258401987512e-2),
+            static_cast<RealType>(-6.75844978572417703979e-4),
+            static_cast<RealType>(-1.11273358356809152121e-5),
+            static_cast<RealType>(-6.12797605223700996671e-8),
+            static_cast<RealType>(-3.78061321691170114390e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.57770840766081587688e-1),
+            static_cast<RealType>(4.81290550545412209056e-2),
+            static_cast<RealType>(3.02079969075162071807e-3),
+            static_cast<RealType>(8.89589626547135423615e-5),
+            static_cast<RealType>(1.07618717290978464257e-6),
+            static_cast<RealType>(3.57383804712249921193e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.4331e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-4.92187819510636697128e0),
+            static_cast<RealType>(-9.94924018698264727979e-1),
+            static_cast<RealType>(-7.69914962772717316098e-2),
+            static_cast<RealType>(-2.85558010159310978248e-3),
+            static_cast<RealType>(-5.19022578720207406789e-5),
+            static_cast<RealType>(-4.19975546950263453259e-7),
+            static_cast<RealType>(-1.13886013623971006760e-9),
+            static_cast<RealType>(-3.46758191090170732580e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.77270673840643360017e-1),
+            static_cast<RealType>(1.18099604045834575786e-2),
+            static_cast<RealType>(3.66889581757166584963e-4),
+            static_cast<RealType>(5.34484782554469770841e-6),
+            static_cast<RealType>(3.19694601727035291809e-8),
+            static_cast<RealType>(5.24649233511937214948e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.7742e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-6.41443550638291133784e0),
+            static_cast<RealType>(-6.38369359780748328332e-1),
+            static_cast<RealType>(-2.43420704406734621618e-2),
+            static_cast<RealType>(-4.45274771094277987075e-4),
+            static_cast<RealType>(-3.99529078051262843241e-6),
+            static_cast<RealType>(-1.59758677464731620413e-8),
+            static_cast<RealType>(-2.14338367751477432622e-11),
+            static_cast<RealType>(-3.23343844538964435927e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.79845511272943785289e-2),
+            static_cast<RealType>(2.90839059356197474893e-3),
+            static_cast<RealType>(4.48172838083912540123e-5),
+            static_cast<RealType>(3.23770691025690100895e-7),
+            static_cast<RealType>(9.60156044379859908674e-10),
+            static_cast<RealType>(7.81134095049301988435e-13),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -128) {
+        RealType t = -log2(ldexp(p, 64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.2451e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-8.23500806363233610938e0),
+            static_cast<RealType>(-4.05652655284908839003e-1),
+            static_cast<RealType>(-7.65978833819859622912e-3),
+            static_cast<RealType>(-6.94194676058731901672e-5),
+            static_cast<RealType>(-3.08771646223818451436e-7),
+            static_cast<RealType>(-6.12443207313641110962e-10),
+            static_cast<RealType>(-4.07882839359528825925e-13),
+            static_cast<RealType>(-3.05720104049292610799e-17),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.37395212065018405474e-2),
+            static_cast<RealType>(7.18654254114820140590e-4),
+            static_cast<RealType>(5.50371158026951899491e-6),
+            static_cast<RealType>(1.97583864365011234715e-8),
+            static_cast<RealType>(2.91169706068202431036e-11),
+            static_cast<RealType>(1.17716830382540977039e-14),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -256) {
+        RealType t = -log2(ldexp(p, 128));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.8732e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-1.04845570631944023913e1),
+            static_cast<RealType>(-2.56502856165700644836e-1),
+            static_cast<RealType>(-2.40615394566347412600e-3),
+            static_cast<RealType>(-1.08364601171893250764e-5),
+            static_cast<RealType>(-2.39603255140022514289e-8),
+            static_cast<RealType>(-2.36344017673944676435e-11),
+            static_cast<RealType>(-7.83146284114485675414e-15),
+            static_cast<RealType>(-2.92218240202835807955e-19),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.17740414929742679904e-2),
+            static_cast<RealType>(1.78084231709097280884e-4),
+            static_cast<RealType>(6.78870668961146609668e-7),
+            static_cast<RealType>(1.21313439060489363960e-9),
+            static_cast<RealType>(8.89917934953781122884e-13),
+            static_cast<RealType>(1.79115540847944524599e-16),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -512) {
+        RealType t = -log2(ldexp(p, 256));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.6946e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-1.32865827226175698181e1),
+            static_cast<RealType>(-1.61802434199627472010e-1),
+            static_cast<RealType>(-7.55642602577784211259e-4),
+            static_cast<RealType>(-1.69457608092375302291e-6),
+            static_cast<RealType>(-1.86612389867293722402e-9),
+            static_cast<RealType>(-9.17015770142364635163e-13),
+            static_cast<RealType>(-1.51422473889348610974e-16),
+            static_cast<RealType>(-2.81661279271583206526e-21),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.08518414679241420227e-2),
+            static_cast<RealType>(4.42335224797004486239e-5),
+            static_cast<RealType>(8.40387821972524402121e-8),
+            static_cast<RealType>(7.48486746424527560620e-11),
+            static_cast<RealType>(2.73676810622938942041e-14),
+            static_cast<RealType>(2.74588200481263214866e-18),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -1024) {
+        RealType t = -log2(ldexp(p, 512));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.7586e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(-1.67937186583822375593e1),
+            static_cast<RealType>(-1.01958138247797604098e-1),
+            static_cast<RealType>(-2.37409774265951876695e-4),
+            static_cast<RealType>(-2.65483321307104128810e-7),
+            static_cast<RealType>(-1.45803536947907216594e-10),
+            static_cast<RealType>(-3.57375116523338994342e-14),
+            static_cast<RealType>(-2.94401318006358820268e-18),
+            static_cast<RealType>(-2.73260616170245224789e-23),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.41357843707822974161e-3),
+            static_cast<RealType>(1.10082540037527566536e-5),
+            static_cast<RealType>(1.04338126042963003178e-8),
+            static_cast<RealType>(4.63619608458569600346e-12),
+            static_cast<RealType>(8.45781310395535984099e-16),
+            static_cast<RealType>(4.23432554226506409568e-20),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        result = -boost::math::numeric_limits<RealType>::infinity();
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_lower_imp_prec(const RealType& p, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.4375) {
+        RealType t = p - static_cast <RealType>(0.4375);
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.2901e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[10] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.48344198262277235851026749871350753173e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18249834490570496537675012473572546187e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20191368895639224466285643454767208281e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.88388953372157636908236843798588258539e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.59477796311326067051769635858472572709e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88799146700484120781026039104654730797e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.15708831983930955608517858269193800412e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01389336086567891484877690859385409842e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.16683694881010716925933071465043323946e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.04356966421177683585461937085598186805e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75444066345435020043849341970820565274e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.95105673975812427406540024601734210826e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.20381124524894051002242766595737443257e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48370658634610329590305283520183480026e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52213602242009530270284305006282822794e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.91028722773916006242187843372209197705e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76130245344411748356977700519731978720e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30834721900169773543149860814908904224e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.37863084758381651884340710544840951679e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46880981703613838666108664771931239970e0),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.375) {
+        RealType t = p - static_cast <RealType>(0.375);
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.8433e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.17326074020471664204142312429732771661e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23412560010002723970559712941124583385e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83665111310407767293290698145068379130e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38459476870110655357485107373883403534e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.28751995328228442619291346921055105808e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.31663592034507247231393516167247241037e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13629333446941271397790762651183997586e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80674058829101054663235662701823250421e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.53226182094253065852552393446365315319e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.14713948941614711932063053969010219677e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62979741122708118776725634304028246971e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[10] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.10550060286464202595779024353437346419e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.15893254630199957990897452211066782021e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.58964066823516762861256609311733069353e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73352515261971291505497909338586980605e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.64737859211974163695241658186141083513e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79137714768236053008878088337762178011e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.71851514659301019977259792564627124877e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.37210093190088984630526671624779422232e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06793750951779308425209267821815264457e1),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.25) {
+        RealType t = p - static_cast <RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.9072e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.63281240925531315038207673147576291783e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72733898245766165408685147762489513406e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.48666841594842113608962500631836790675e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.38711336213357101067420572773139678571e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.19536066931882831915715343914510496760e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70911330354860558400876197129777829223e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.46138758869321272507090399082047865434e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.42653825421465476333482312795245170700e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68040069633027903153088221686431049116e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.63017854949929226947577854802720988740e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.57362168966659376351959631576588023516e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.48386631313725080746815524770260451090e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03293129698111279047104766073456412318e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29511778027351594854005887702013466376e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66155745848864270109281703659789474448e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.25628362783798417463294553777015370203e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.93162726153946899828589402901015679821e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.51582398149308841534372162372276623400e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55512400116480727630652657714109740448e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.11949742749256615588470329024257669470e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28090154738508864776480712360731968283e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.44307717248171941824014971579691790721e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.33595130666758203099507440236958725924e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76156378002782668186592725145930755636e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.70446647862725980215630194019740606935e0),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - static_cast <RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.9092e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18765177572396470161180571018467019660e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.16991481696416567893311341049825218287e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.25497491118598918048058751362064598010e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.00259915194411316966036757165146681474e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.20350803730836873687692010728689867756e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.75441278117456011071671644613599089820e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18844967505497310645091822621081741562e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.84771867850847121528386231811667556346e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.78112436422992766542256241612018834150e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.82617957794395420193751983521804760378e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.77260227244465268981198741998181334875e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.61918290776044518321561351472048170874e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10309368217936941851272359946015001037e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68917274690585744147547352309416731690e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.96914697182030973966321601422851730384e4),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98063872144867195074924232601423646991e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65911346382127464683324945513128779971e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02451223307009464199634546540152067898e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.12662676019712475980273334769644047369e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.77637305574675655673572303462430608857e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65900204382557635710258095712789133767e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.61649498173261886264315880770449636676e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.29867325788870863753779283018061152414e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.31375646045453788071216808289409712455e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.91053361331987954531162452163243245571e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.30917504462260061766689326034981496723e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.95779171217851232246427282884386844906e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.07234815204245866330282860014624832711e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21353269292094971546479026200435095695e4),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.9653e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18765177572396470161180571018467025793e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.94718878144788678915739777385667044494e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.01760622104142726407095836139719210570e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.27585388152893587017559610649258643106e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.26494992809545184138230791849722703452e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.19962820888633928632710264415572027960e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.10249328404135065767844288193594496173e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.94733898567966142295343935527193851633e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.10350856810280579594619259121755788797e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.23852908701349250480831167491889740823e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.61008160195204632725691076288641221707e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[11] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59109221235949005113322202980300291082e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07119192591092503378838510797916225920e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97313678065269932508447079892684333156e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.85743007214453288049750256975889151838e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21504290861269099964963866156493713716e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00880286431998922077891394903879512720e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78806057460269900288838437267359072282e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15390284861815831078443996558014864171e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09877166004503937701692216421704042881e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.01544823753120969225271131241177003165e-11),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.7872e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.59822399410385083283727681965013517187e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.30676107401101401386206170152508285083e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.14999782004905950712290914501961213222e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.33941786334132569296061539102765384372e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.67220197146826865151515598496049341734e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.80553562310354708419148501358813792297e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.69365728863553037992854715314245847166e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.66571611322125393586164383361858996769e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.60112572827002965346926427208336150737e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.82368436189138780270310776927920829805e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.53112953085778860983110669544602606343e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.47363651755817041383574210879856850108e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.56959356177318833325064543662295824581e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16259632533790175212174199386945953139e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.51102540845397821195190063256442894688e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51733661576324699382035973518172469602e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01435607980568082538278883569729476204e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93274478117803447229185270863587786287e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.79695020433868416640781960667235896490e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64179255575983014759473815411232853821e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88411076775875459504324039642698874213e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47924359965537384942568979646011627522e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.80915944234873904741224397674033111178e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67530059257263142305079790717032648103e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.59778634436027309520742387952911132163e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.45772841855129835242992919296397034883e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40356170422999016176996652783329671363e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31127037096552892520323998665757802862e-16),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.6679e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.67354365380697578246790709817724831418e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.98681625368564496421038758088088788795e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.07310677200866436332040539417232353673e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.24429836496456823308103613923194387860e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.02985399190977938381625172095575017346e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.76782196972948240235456454778537838123e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.74449002785643398012450514191731166637e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.59599894044461264694825875303563328822e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.25326249969126313897827328136779597159e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.41714701672521323699602179629851858792e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.43197925999694667433180053594831915164e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.84385683045691486021670951412023644809e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34484740544060627138216383389282372695e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.69717198111130468014375331439613690658e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54464319402577486444841981479085908190e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.56568169258142086426383908572004868200e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52144703581828715720555168887427064424e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87268567039210776554113754014224005739e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.04907480436107533324385788289629535047e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05063218951887755000006493061952858632e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.88707869862323507236241669797692957827e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12876335369047582931728838551780783006e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96657547014655679104867167083078285517e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05799908632250375607393338998205481867e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26543514080500125624753383852230043206e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02102501711063497529014782040893679505e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91577723105757841509716090936343311518e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.13824283239122976911704652025193325941e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.6323e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.92187819510636694694450607724165689649e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.97763858433958798529675258052376253402e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.51315096979196319830162238831654165509e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.63462872759639470195664268077372442947e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.42843455093529447002457295994721102683e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.09839699044798405685726233251577671229e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.42918585783783247934440868680748693033e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.39927851612709063686969934343256912856e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.08811978806833318962489621493456773153e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.05790315329766847040100971840989677130e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.45479905512618918078640786598987515012e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.26725031195260767541308143946590024995e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.93921283405116349017396651678347306610e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.46989755992471397407520449698676945629e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.84098833615882764168840211033822541979e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76933186134913044021577076301874622292e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.25794095712489484166470336696962749356e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.02367257449622569623375613790000874499e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72422948147678152291655497515112236849e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54841786738844966378222670550160421679e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40690766057741905625149753730480294357e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.57722740290698689456097239435447030950e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12202417878861628530322715231540006386e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.52053248659561670052645118655279630611e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.93156242508301535729374373870786335203e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40767922796889118151219837068812449420e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83472702205100162081157644960354192597e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.07575352729625387198150555665307193572e-24),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.8799e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.41443550638291131009585191506467028820e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.27887396494095561461365753577189254063e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.12752633002089885479040650194288302309e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.79073355729340068320968150408320521772e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.92104558368762745368896313096467732214e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.31521786147036353766882145733055166296e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.69209485282228501578601478546441260206e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.17440286764020598209076590905417295956e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.24323354132100896221825450145208350291e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53395893728239001663242998169841168859e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.68737623513761169307963299679882178852e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34293243065056704017609034428511365032e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.16077944943653158589001897848048630079e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.62195354664281744711336666974567406606e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34400562353945663460416286570988365992e-30),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87838171063584994806998206766890809367e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55395225505766120991458653457272783334e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.45282436421694718640472363162421055686e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29570159526914149727970023744213510609e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.75502365627517415214497786524319814617e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74140267916464056408693097635546173776e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53554952415602030474322188152855226456e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25818606585833092910042975933757268581e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79945579370700383986587672831350689541e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64188969985517050219881299229805701044e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.59811800080967790439078895802795103852e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.28225402720788909349967839966304553864e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.22313199865592821923485268860178384308e-28),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -128) {
+        RealType t = -log2(ldexp(p, 64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1548e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.23500806363233607692361021471929016922e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.16747938711332885952564885548768072606e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.58252672740225210061031286151136185818e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.15477223656783400505701577048140375949e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.51130817987857130928725357067032472382e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68955185187558206711296837951129048907e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.30418343536824247801373327173028702308e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.96029044368524575193330776540736319950e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53259926294848786440686413056632823519e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.09476918795964320022985872737468492126e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.20864569882477440934325776445799604204e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.01321468510611172635388570487951907905e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.45079221107904118975166347269173516170e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.36559452694774774399884349254686988041e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.92396730553947142987611521115040472261e-35),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.36599681872296382199486815169747516110e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.86330544720458446620644055149504593514e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.23793435123936109978741252388806998743e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41867848381419339587560909646887411175e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46493580168298395584601073832432583371e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03537470107933172406224278121518518287e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00375884189083338846655326801486182158e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62515041281615938158455493618149216047e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42337949780187570810208014464208536484e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40196890950995648233637422892711654146e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11894256193449973803773962108906527772e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00900961462911160554915139090711911885e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65825716532665817972751320034032284421e-32),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -256) {
+        RealType t = -log2(ldexp(p, 128));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.3756e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.04845570631944023525776899386112795330e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.18146685146173151383718092529868406030e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.13255476532916847606354932879190731233e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.44228173780288838603949849889291143631e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.18663599891768480607165516401619315227e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.61193813386433438633008774630150180359e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54402603714659392010463991032389692959e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.76816632514967325885563032378775486543e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.22774672528068516513970610441705738842e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.47312348271366325243169398780745416279e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.10984325972747808970318612951079014854e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.81620524028936785168005732104270722618e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.03443227423068771484783389914203726108e-29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.02313300749670214384591200940841254958e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.40321396496046206171642334628524367374e-39),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67292043485384876322219919215413286868e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.61652550158809553935603664087740554258e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14722810796821047167211543031044501921e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.78954660078305461714050086730116257387e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52794892087101750452585357544956835504e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59652255206657812422503741672829368618e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.84914442937017449248597857220675602148e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23662183613814475007146598734598810102e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.55388618781592901470236982277678753407e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.20418178834057564300014964843066904024e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48606639104883413456676877330419513129e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39845313960416564778273486179935754019e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14538443937605324316706211070799970095e-35),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -512) {
+        RealType t = -log2(ldexp(p, 256));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.6639e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.32865827226175697711590794217590458484e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.27551166309670994513910580518431041518e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.57161482253140058637495100797888501265e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.26908727392152312216118985392395130974e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.31391169101865809627389212651592902649e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.58926174475498352884244229017384309804e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.97074898765380614681225071978849430802e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.59852103341093122669197704225736036199e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.97287954178083606552531325613580819555e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.57634773176875526612407357244997035312e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.34467233210713881817055138794482883359e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.91236170873875898506577053309622472122e-29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.12220990075567880730037575497818287435e-33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.20881889651487527801970182542596258873e-37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56848486878078288956741060120464349537e-43),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33267370502089423930888060969568705647e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39632352029900752622967578086289898150e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42703739831525305516280300008439396218e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45767617531685380458878368024246654652e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40344962756110545138002101382437142038e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.47015258371290492450093115369080460499e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97280918936580227687603348219414768787e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40441024286579579491205384492088325576e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26130000914236204012152918399995098882e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03893990417552709151955156348527062863e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12687002255767114781771099969545907763e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74218645918961186861014420578277888513e-35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36897549168687040570349702061165281706e-39),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -1024) {
+        RealType t = -log2(ldexp(p, 512));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0360e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.67937186583822375017526293948703697225e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.06681764321003187068904973985967908140e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.12508887240195683379004033347903251977e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.56847823157999127998977939588643284176e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.31281869105767454049413701029676766275e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.01498951390503036399081209706853095793e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.72866867304007090391517734634589972858e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.44819806993104486828983054294866921869e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.91441365806306460165885645136864045231e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.71364347097027340365042558634044496149e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.63601648491144929836375956218857970640e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.76948956673676441236280803645598939735e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.47367651137203634311843318915161504046e-37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.83187517541957887917067558455828915184e-41),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.21480186561579326423946788448005430367e-47),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16494591376838053609854716130343599036e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.97651667629616309497454026431358820357e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77741398674456235952879526959641925087e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39478109667902532743651043316724748827e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35965905056304225411108295866332882930e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83206524996481183422082802793852630990e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30320324476590103123012385840054658401e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.30322948189714718819437477682869360798e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43729790298535728717477691270336818161e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.89788697114251966298674871919685298106e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.43510901856942238937717065880365530871e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38232043389918216652459244727737381677e-38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64599465785019268214108345671361994702e-43),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -2048) {
+        RealType t = -log2(ldexp(p, 1024));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.5130e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.11959316095291435774375635827672517008e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.30292575371366023255165927527306483022e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.54260157439166096303943109715675142318e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.61232819199170639867079290977704351939e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.74481972848503486840161528924694379655e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.98283577906243441829434029827766571263e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.90691715740583828769850056130458574520e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.31354728925505346732804698899977180508e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.42545288372836698650371589645832759416e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.12901629676328680681102537492164204387e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.99690040253176100731314099573187027309e-31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.07899506956133955785140496937520311210e-35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.88607240095256436460507438213387199067e-40),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.78815984551154095621830792130401294111e-45),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.20516030880916148179297554212609531432e-51),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.81973541224235020744673910266545976833e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49156990107280109344880219729275939242e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21606730563542176411852745162267260946e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11449329529433741944366607648360521674e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35659138507940819452801802756409587220e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95708516718550485872934856595725983907e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78871290972777009292563576533612002908e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60955318960542258732596447917271198307e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72437581728117963125690670426402194936e-29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77473938855958487119889840032590783232e-33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66198494713809467076392278745811981500e-37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34142897042941614778352280692901008538e-42),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98791658647635156162347063765388728959e-47),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4096) {
+        RealType t = -log2(ldexp(p, 2048));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.1220e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.67307564006689676593687414536012112755e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.21005690250741024744367516466433711478e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.11537345869365655126739041291119096558e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.82910297156061391001507891422501792453e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.51574933516708249049824513935386420692e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56433757960363802088718489136097249753e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.74248235865301086829849817739500215149e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.19151506367084295119369434315371762091e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.98518959000360170320183116510466814569e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.21330422702314763225472001861559380186e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.44346922987964428874014866468161821471e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.59773305152191273570416120169527607421e-39),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.81894412321532723356954669501665983316e-44),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.25766346856943928908756472385992861288e-49),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.44601448994095786447982489957909713982e-55),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90814053669730896497462224007523900520e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.72450689508305973756255440356759005330e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76517273086984384225845151573287252506e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31844251885317815627707511621078762352e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.22689324865113257769413663010725436393e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27524826460469866934006001123700331335e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39172850948322201614266822896191911031e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40343613937272428197414545004329993769e-27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.17936773028976355507339458927541970545e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66512598049860260933817550698863263184e-36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.06432209670481882442649684139775366719e-41),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10247886333916820534393624270217678968e-46),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40899321122058714028548211810431871877e-51),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -8192) {
+        RealType t = -log2(ldexp(p, 4096));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.8974e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.36960987939726803544369406181770745475e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.17239311695985070524235502979761682692e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.51192208811996535244435318068035492922e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.38938553034896173195617671475670860841e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.55156910900732478717648524688116855303e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.14905040433940475292279950923000597280e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -7.35237464492052939771487320880614968639e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.07937000518607459141766382199896010484e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.42797358100745086706362563988598447929e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.27871637324128856529004325499921407260e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.99553669724530906250814559570819049401e-37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.04272644552406682186928100080598582627e-42),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.70093315570856725077212325128817808000e-47),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.51767319872105260145583037426067406953e-53),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.87159268409640967747617639113346310759e-59),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45350026842128595165328480395513258721e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.30398532102631290226106936127181928207e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45242264812189519858570105609209495630e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.22745462510042159972414219082495434039e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31834645038348794443252730265421155969e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44590392528760847619123404904356730177e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08436623062305193311891193246627599030e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46542554766048266351202730449796918707e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78672067064478133389628198943161640913e-34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56573050761582685018467077197376031818e-39),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92145137276530136848088270840255715047e-44),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96933863894082533505471662180541379922e-49),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92661972206232945959915223259585457082e-55),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -16384) {
+        RealType t = -log2(ldexp(p, 8192));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.8819e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.24662793339079714510108682543625432532e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.25841960642102016210295419419373971750e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.10589156998251704634852108689102850747e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.18697614924486382142056819421294206504e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.79445222262445726654186491785652765635e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.41847338407338901513049755299049551186e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.44550500540299259432401029904726959214e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.97463434518480676079167684683604645092e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68404349202062958045327516688040625516e-30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.14018837476359778654965300153810397742e-35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.67726222606571327724434861967972555751e-40),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.48082398191886705229604237754446294033e-45),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.28534456209055262678153908192583037946e-51),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.99466700145428173772768099494881455874e-57),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.43473066278196981345209422626769148425e-63),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.26570199429958856038191879713341034013e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32484776300757286079244074394356908390e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31234182027812869096733088981702059020e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13711443044675425837293030288097468867e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.11480036828082409994688474687120865023e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.25592803132287127389756949487347562847e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45726006695535760451195102271978072855e-28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13082170504731110487003517418453709982e-32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48217827031663836930337143509338210426e-37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47389053144555736191304002865419453269e-42),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90980968229123572201281013063229644814e-47),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79448544326289688123648457587797649323e-53),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56179793347045575604935927245529360950e-59),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        result = -boost::math::numeric_limits<RealType>::infinity();
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.25) {
+        RealType t = p - static_cast <RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 1.8559e-18
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(4.81512108276093785320e-1),
+            static_cast<RealType>(-2.74296316128959647914e0),
+            static_cast<RealType>(-3.29973875964825685757e1),
+            static_cast<RealType>(-4.87536980816224603581e1),
+            static_cast<RealType>(8.22233203036734027999e1),
+            static_cast<RealType>(1.21654607908452130093e2),
+            static_cast<RealType>(-6.66681853240657307279e1),
+            static_cast<RealType>(-4.28101952511581488588e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[10] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.20189490825315245036e0),
+            static_cast<RealType>(1.63469912146101848441e1),
+            static_cast<RealType>(-1.52740920318273920072e1),
+            static_cast<RealType>(-5.41684560257839409762e1),
+            static_cast<RealType>(6.51733677169299416471e0),
+            static_cast<RealType>(3.93092001388102589237e1),
+            static_cast<RealType>(-9.59983666140749481195e-1),
+            static_cast<RealType>(-9.95648827557655863699e-1),
+            static_cast<RealType>(-1.32007124426778083829e0),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - static_cast <RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 4.6019e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(1.70276979914029733585e0),
+            static_cast<RealType>(2.09991992116646276165e1),
+            static_cast<RealType>(2.26775403775298867998e1),
+            static_cast<RealType>(-4.85384304722129472833e2),
+            static_cast<RealType>(-1.47107146466495573999e3),
+            static_cast<RealType>(-7.08748473959943943929e1),
+            static_cast<RealType>(1.54245210917147215257e3),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.13092357122115486375e1),
+            static_cast<RealType>(1.57318281834689144053e2),
+            static_cast<RealType>(4.42261730187813035957e2),
+            static_cast<RealType>(2.10814431586717588454e2),
+            static_cast<RealType>(-6.36700983439599552504e2),
+            static_cast<RealType>(-2.82923881266630617596e2),
+            static_cast<RealType>(1.36613971025062750340e2),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2193e-19
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(4.25692449785074345588e-1),
+            static_cast<RealType>(3.10963501706596356267e-1),
+            static_cast<RealType>(2.91357806215297069863e-2),
+            static_cast<RealType>(2.34716342676849303244e-2),
+            static_cast<RealType>(5.83137296293361915583e-3),
+            static_cast<RealType>(3.71792415497884868748e-4),
+            static_cast<RealType>(1.59538372221030642757e-4),
+            static_cast<RealType>(4.74040834029330213692e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.14801234415100707213e-1),
+            static_cast<RealType>(1.04693730144480856638e-1),
+            static_cast<RealType>(3.81581484862997435076e-2),
+            static_cast<RealType>(8.95334009127358617362e-3),
+            static_cast<RealType>(1.43316686981760147226e-3),
+            static_cast<RealType>(1.81367766024620080990e-4),
+            static_cast<RealType>(1.54779999748286671973e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.4418e-17
+        BOOST_MATH_STATIC const RealType P[11] = {
+            static_cast<RealType>(5.07341098045260541890e-1),
+            static_cast<RealType>(3.11771145411143166935e-1),
+            static_cast<RealType>(1.74515601081894060888e-1),
+            static_cast<RealType>(8.46576990174024231338e-2),
+            static_cast<RealType>(2.57510090204322149315e-2),
+            static_cast<RealType>(8.26605326867021684811e-3),
+            static_cast<RealType>(1.73081423934722046819e-3),
+            static_cast<RealType>(3.36314161099011673569e-4),
+            static_cast<RealType>(4.50990441180388912803e-5),
+            static_cast<RealType>(4.53513191985642134268e-6),
+            static_cast<RealType>(2.62304611053075404923e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[11] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.28225379952156944029e-1),
+            static_cast<RealType>(3.49662079845715371907e-1),
+            static_cast<RealType>(1.45408903426879603625e-1),
+            static_cast<RealType>(5.06773501409016231879e-2),
+            static_cast<RealType>(1.45385556714043243731e-2),
+            static_cast<RealType>(3.31235831325018043744e-3),
+            static_cast<RealType>(6.06977554525543056050e-4),
+            static_cast<RealType>(8.42406730405209749492e-5),
+            static_cast<RealType>(8.32337989541696717905e-6),
+            static_cast<RealType>(4.84923196546857128337e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.7932e-17
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(5.41774626094491510395e-1),
+            static_cast<RealType>(4.11060141334529017898e-1),
+            static_cast<RealType>(1.48195601801946264526e-1),
+            static_cast<RealType>(3.33881552814492855873e-2),
+            static_cast<RealType>(5.20893974732203890418e-3),
+            static_cast<RealType>(5.84734765774178832854e-4),
+            static_cast<RealType>(4.71028150898133935445e-5),
+            static_cast<RealType>(2.59185739450631464618e-6),
+            static_cast<RealType>(7.77428184258777394627e-8),
+            static_cast<RealType>(2.51255632629650930196e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(7.58341767924960527280e-1),
+            static_cast<RealType>(2.73511775500642961539e-1),
+            static_cast<RealType>(6.16011987856129890130e-2),
+            static_cast<RealType>(9.61296002312356116021e-3),
+            static_cast<RealType>(1.07890675777726076554e-3),
+            static_cast<RealType>(8.69223632953458271977e-5),
+            static_cast<RealType>(4.78248875031756169279e-6),
+            static_cast<RealType>(1.43460852065144859304e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.0396e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(5.41926067826974905066e-1),
+            static_cast<RealType>(4.86926556246548518715e-1),
+            static_cast<RealType>(2.11963908288176005856e-1),
+            static_cast<RealType>(5.92200639925655576883e-2),
+            static_cast<RealType>(1.18859816815542567438e-2),
+            static_cast<RealType>(1.76833662992855443754e-3),
+            static_cast<RealType>(2.21226152157950219596e-4),
+            static_cast<RealType>(1.50444847316426133872e-5),
+            static_cast<RealType>(1.87458213915373906356e-6),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.98511036742503939380e-1),
+            static_cast<RealType>(3.91130673008184655152e-1),
+            static_cast<RealType>(1.09277016228474605069e-1),
+            static_cast<RealType>(2.19328471889880028208e-2),
+            static_cast<RealType>(3.26305879571349016107e-3),
+            static_cast<RealType>(4.08222014684743492069e-4),
+            static_cast<RealType>(2.77611385768697969181e-5),
+            static_cast<RealType>(3.45911046256304795257e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else {
+        RealType p_square = p * p;
+
+        if ((boost::math::isnormal)(p_square)) {
+            result = 1 / cbrt(p_square * constants::two_pi<RealType>());
+        }
+        else if (p > 0) {
+            result = 1 / (cbrt(p) * cbrt(p) * cbrt(constants::two_pi<RealType>()));
+        }
+        else {
+            result = boost::math::numeric_limits<RealType>::infinity();
+        }
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (p >= 0.375) {
+        RealType t = p - static_cast <RealType>(0.375);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 4.0835e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.00474815142578902619056852805926666121e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56422290947427848191079775267512708223e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.70103710180837859003070678080056933649e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.08521918131449191445864593768320217287e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29340655781369686013042530147130581054e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.24198237124638368989049118891909723118e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.43382878809828906953609389440800537385e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.45564809127564867825118566276365267035e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75881247317499884393790698530115428373e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.55845932095942777602241134226597158364e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.41328261385867825781522154621962338450e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06758225510372847658316203115073730186e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10895417312529385966062255102265009972e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[12] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.88252553879196710256650370298744093367e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.54875259600848880869571364891152935969e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.78589587338618424770295921221996471887e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.15356831947775532414727361010652423453e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12951532118504570745988981200579372124e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48163841544376327168780999614703092433e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.56786609618056303930232548304847911521e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.25610739352108840474197350343978451729e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27063786175330237448255839666252978603e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.11941093895004369510720986032269722254e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.51487618026728514833542002963603231101e1),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.25) {
+        RealType t = p - static_cast <RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 5.7633e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81512108276093787175849069715334402323e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24417080443497141096829831516758083481e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.67006165991083501886186268944009973084e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.74402382755828993223083868408545308340e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.49182541725192134610277727922493871787e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.67273564707254788337557775618297381267e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.73476432616329813096120568871900178919e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31235376166262024838125198332476698090e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.59379285677781413393733801325840617522e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.38151434050794836595564739176884302539e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33534676810383673962443893459127818078e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.38110822236764293910895765875742805411e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.42750073722992463087082849671338957023e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.54255748148299874514839812717054396793e2),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64823387375875361292425741663822893626e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02973633484731117050245517938177308809e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71288209768693917630236009171518272534e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23837527610546426062625864735895938014e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.05056816585729983223036277071927165555e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.48087477651935811184913947280572029967e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04631058325147527913398256133791276127e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69813394441679590721342220435891453447e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.92323371456465893290687995174952942311e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.68542430563281320943284015587559056621e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.17969051793607842221356465819951568080e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82773308760283383020168853159163391394e2),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (p >= 0.125) {
+        RealType t = p - static_cast <RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Absolute Error: 2.1140e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70276979914029738186601698003670175907e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63126762626382548478172664328434577553e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04190225271045202674546813475341133174e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.93523974140998850492859698545966806498e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19814006186501010136822066747124777014e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.55931423620290859807616748030589502039e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.78874021192395317496507459296221703565e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.03860533237347587977439662522389465152e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.77882648875352690605815508748162607271e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05498612167816258406694194925933958145e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05326361485692298778330190198630232666e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.85827791876754731187453265804790139032e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.93719378006868242377955041137674308589e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.56839957539576784391036362196229047625e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95604329277359828898502487252547842378e6),
+        };
+        BOOST_MATH_STATIC const RealType Q[16] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79208640567193066236912382037923299779e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94775812217734059201656828286490832145e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16467934643564936346029555887148320030e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35525720248600096849901920839060920346e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69760913594243328874861534307039589127e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.32330501005950982838953061458838040612e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.79610639577090112327353399739315606205e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.43314292923292828425630915931385776182e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.97538885038058371436244702169375622661e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.48431896958634429210349441846613832681e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93459449030820736960297236799012798749e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.67200014823529787381847745962773726408e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.37035571075060153491151970623824940994e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.22682822001329636071591164177026394518e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09781406768816062486819491582960840983e4),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 3));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1409e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25692449785074345466504245009175450649e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75679137667345136118441108839649360362e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06171803174020856964914440692439080669e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87798066278592051163038122952593080648e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20070543183347459409303407166630392077e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13457391270614708627745403376469848816e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06743974464224003715510181633693539914e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16870984737226212814217822779976770316e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21845093091651861426944931268861694026e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.85357146081877929591916782097540632519e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.19085800299127898508052519062782284785e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41985644250494046067095909812634573318e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30857042700765443668305406695750760693e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.10466412567107519640190849286913680449e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31914248618040435028023418981527961171e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.73578090645412656850163531828709850171e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.57329813782272411333511950903192234311e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62736127875896578315177123764520823372e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76809643836078823237530990091078867553e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32026948719622983920194944841520771986e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45051018027743807545734050620973716634e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.58281707210621813556068724127478674938e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.63884527227517358294732620995363921547e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15973602356223075515067915930205826229e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.35069439950884795002182517078104942615e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15454119109586223908613596754794988609e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55273685376557721039847456564342945576e-10),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.2521e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[23] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.07341098045260497471001948654506267614e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16518383383878659278973043343250842753e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10029094208424121908983949243560936013e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.04771840726172284780129819470963100749e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34173170868011689830672637082451998700e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41990262178664512140746911398264330173e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.06779488545758366708787010705581103705e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41892665233583725631482443019441608726e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.20692306716979208762785454648538891867e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.11097906809673639231336894729060830995e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37476591232600886363441107536706973169e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02659053066396720145189153810309784416e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02209877191642023279303996697953314344e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.56663781532392665205516573323950583901e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95655734237060800145227277584749429063e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06357695252098035545383649954315685077e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78759045059235560356343893064681290047e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.95881339136963512103591745337914059651e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70156441275519927563064848389865812060e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.99745225746277063516394774908346367811e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.45718440382347867317547921045052714102e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.39027665085346558512961348663034579801e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05739751797738770096482688062542436470e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[23] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43370372582239919321785765900615222895e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52872159582703775260145036441128318159e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28243735290178057451806192890274584778e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.93375398009812888642212045868197435998e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.73364866677217419593129631900708646445e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53645928499107852437053167521160449434e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74280939589407863107682593092148442428e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80095449855178765594835180574448729793e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65924845456946706158946250220103271334e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52170861715436344002253767944763106994e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87246437551620484806338690322735878649e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.88631873230311653853089809596759382095e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84478812152918182782333415475103623486e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.47998768403859674841488325856607782853e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82364683269852480160620586102339743788e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65854316058742127585142691993199177898e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11358340340462071552670838135645042498e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22818744671190957896035448856159685984e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11038729491846772238262374112315536796e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.21355801166652957655438257794658921155e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.41278271853370874105923461404291742454e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95100373579692323015092323646110838623e-15),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0703e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[21] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41774626094491452462664949805613444094e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96383089261273022706449773421031102175e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.16315295073029174376617863024082371446e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.65377894193914426949840018839915119410e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33210993830236821503160637845009556016e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69315463529653886947182738378630780083e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09869947341518160436616160018702590834e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44331691052908906654005398143769791881e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13131413925652085071882765653750661678e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.64441840437413591336927030249538399459e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78393581596372725434038621824715039765e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.50239319821178575427758224587858938204e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92619647697287767235953207451871137149e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26901081456833267780600560830367533351e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12151768312254597726918329997945574766e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36907507996686107513673694597817437197e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31699373909892506279113260845246144240e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.11230682511893290562864133995544214588e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.44627067257461788044784631155226503036e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39869585157420474301450400944478312794e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.82128612844034824876694595066123093042e-27),
+        };
+        BOOST_MATH_STATIC const RealType Q[20] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65414405277042133067228113526697909557e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32179221250476209346757936207079534440e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.74217392682100275524983756207618144313e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45810448055940046896534973720645113799e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.81487408603233765436807980794697048675e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49442843848941402948883852684502731460e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66330842256792791665907478718489013963e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93285292223845804061941359223505045576e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87966347754794288681626114849829710697e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13711644429711675111080150193733607164e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61758862007482013187806625777101452737e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.55435556106272558989915248980090731639e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34166571320580242213843747025082914011e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31411360099525131959755145015018410429e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.21684839228785650625270026640716752452e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.43021096301255274530428188746599779008e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.58831290247776456235908211620983180005e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74309280855806399632683315923592902203e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58097100528573186098159133443927182780e-18),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.4124e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[19] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41926067826974814669251179264786585885e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.21141529920003643675474888047093566280e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59964592861304582755436075901659426485e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.95135112971576806260593571877646426022e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12322024725362032809787183337883163254e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.96758465518847580191799508363466893068e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12389553946694902774213055563291192175e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04599236076217479033545023949602272721e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.16143771174487665823565565218797804931e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38966874413947625866830582082846088427e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02590325514935982607907975481732376204e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44376747400143802055827426602151525955e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.82088624006657184426589019067893704020e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95757210706845964048697237729100056232e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.36096213291559182424937062842308387702e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14362780521873256616533770657488533993e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73571098395815275003552523759665474105e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47286214854389274681661944885238913581e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.73701196181204039400706651811524874455e-34),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77119890916406072259446489508263892540e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95177888809731859578167185583119074026e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.29131027214559081111011582466619105016e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31442657037887347262737789825299661237e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83928863984637222329515960387531101267e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07389089078167127136964851949662391744e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93013847797006474150589676891548600820e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50600573851533884594030683413819219915e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94539484213971921794449107859541806317e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58360895032645281635534287874266252341e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66414102108217999886628042310332365446e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07411076181287950822375436854492998754e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61224937285582228022463072515935601355e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.89242339209389981530783624934733098598e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11030225010379194015550512905872992373e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20285566539355859922818448335043495666e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71782855576364068752705740544460766362e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.1680e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.41926070139289008206183757488364846894e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78434820569480998586988738136492447574e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07939171933509333571821660328723436210e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.92438439347811482522082798370060349739e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24288918322433485413615362874371441367e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04437759300344740815274986587186340509e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74063952231188399929705762263485071234e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07228849610363181194047955109059900544e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93120850707001212714821992328252707694e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40911049607879914351205073608184243057e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71898232013947717725198847649536278438e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06963706982203753050300400912657068823e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.79849166632277658631839126599110199710e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.74682085785152276503345630444792840850e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09650236336641219916377836114077389212e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97326394822836529817663710792553753811e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.26635728806398747570910072594323836441e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.96470010392255781222480229189380065951e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.82841492468725267177870050157374330523e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83703946702662950408034486958999188355e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09320896703777230915306208582393356690e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29346630787642344947323515884281464979e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77242894492599243245354774839232776944e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05722029871614922850936250945431594997e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.66920224988248720006255827987385374411e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40887155754772190509572243444386095560e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44545968319921473942351968892623238920e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.17198676140022989760684932594389017027e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97376935482567419865730773801543995320e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06997835790265899882151030367297786861e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06862653266619706928282319356971834957e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02334307903766790059473763725329176667e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.33174535634931487079630169746402085699e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70989324903345102377898775620363767855e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.47067260145014475572799216996976703615e-18),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * cbrt(p * p));
+    }
+    else {
+        RealType p_square = p * p;
+
+        if ((boost::math::isnormal)(p_square)) {
+            result = 1 / cbrt(p_square * constants::two_pi<RealType>());
+        }
+        else if (p > 0) {
+            result = 1 / (cbrt(p) * cbrt(p) * cbrt(constants::two_pi<RealType>()));
+        }
+        else {
+            result = boost::math::numeric_limits<RealType>::infinity();
+        }
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 53>& tag)
+{
+    if (p > 0.5) {
+        return !complement ? mapairy_quantile_upper_imp_prec(1 - p, tag) : mapairy_quantile_lower_imp_prec(1 - p, tag);
+    }
+
+    return complement ? mapairy_quantile_upper_imp_prec(p, tag) : mapairy_quantile_lower_imp_prec(p, tag);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 113>& tag)
+{
+    if (p > 0.5) {
+        return !complement ? mapairy_quantile_upper_imp_prec(1 - p, tag) : mapairy_quantile_lower_imp_prec(1 - p, tag);
+    }
+
+    return complement ? mapairy_quantile_upper_imp_prec(p, tag) : mapairy_quantile_lower_imp_prec(p, tag);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_quantile_imp(const mapairy_distribution<RealType, Policy>& dist, const RealType& p, bool complement)
+{
+    // This routine implements the quantile for the Map-Airy distribution,
+    // the value p may be the probability, or its complement if complement=true.
+
+    constexpr auto function = "boost::math::quantile(mapairy<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_probability(function, p, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * mapairy_quantile_imp_prec(p, complement, tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_mode_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(-1.16158727113597068525);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_mode_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.1615872711359706852500000803029112987);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_mode_imp(const mapairy_distribution<RealType, Policy>& dist)
+{
+    // This implements the mode for the Map-Airy distribution,
+
+    constexpr auto function = "boost::math::mode(mapairy<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * mapairy_mode_imp_prec<RealType>(tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_median_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(-0.71671068545502205332);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_median_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, -0.71671068545502205331700196278067230944440);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_median_imp(const mapairy_distribution<RealType, Policy>& dist)
+{
+    // This implements the median for the Map-Airy distribution,
+
+    constexpr auto function = "boost::math::median(mapairy<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * mapairy_median_imp_prec<RealType>(tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_entropy_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(2.00727681841065634600);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_entropy_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.0072768184106563460003025875575283708);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mapairy_entropy_imp(const mapairy_distribution<RealType, Policy>& dist)
+{
+    // This implements the entropy for the Map-Airy distribution,
+
+    constexpr auto function = "boost::math::entropy(mapairy<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The Map-Airy distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = mapairy_entropy_imp_prec<RealType>(tag_type()) + log(scale);
+
+    return result;
+}
+
+} // detail
+
+template <class RealType = double, class Policy = policies::policy<> >
+class mapairy_distribution
+{
+    public:
+    typedef RealType value_type;
+    typedef Policy policy_type;
+
+    BOOST_MATH_GPU_ENABLED mapairy_distribution(RealType l_location = 0, RealType l_scale = 1)
+        : mu(l_location), c(l_scale)
+    {
+        constexpr auto function = "boost::math::mapairy_distribution<%1%>::mapairy_distribution";
+        RealType result = 0;
+        detail::check_location(function, l_location, &result, Policy());
+        detail::check_scale(function, l_scale, &result, Policy());
+    } // mapairy_distribution
+
+    BOOST_MATH_GPU_ENABLED RealType location()const
+    {
+        return mu;
+    }
+    BOOST_MATH_GPU_ENABLED RealType scale()const
+    {
+        return c;
+    }
+
+    private:
+    RealType mu;    // The location parameter.
+    RealType c;     // The scale parameter.
+};
+
+typedef mapairy_distribution<double> mapairy;
+
+#ifdef __cpp_deduction_guides
+template <class RealType>
+mapairy_distribution(RealType) -> mapairy_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+template <class RealType>
+mapairy_distribution(RealType, RealType) -> mapairy_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+#endif
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const mapairy_distribution<RealType, Policy>&)
+{ // Range of permissible values for random variable x.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const mapairy_distribution<RealType, Policy>&)
+{ // Range of supported values for random variable x.
+   // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-tools::max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const mapairy_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::mapairy_pdf_imp(dist, x);
+} // pdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const mapairy_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::mapairy_cdf_imp(dist, x, false);
+} // cdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const mapairy_distribution<RealType, Policy>& dist, const RealType& p)
+{
+    return detail::mapairy_quantile_imp(dist, p, false);
+} // quantile
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<mapairy_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::mapairy_cdf_imp(c.dist, c.param, true);
+} //  cdf complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<mapairy_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::mapairy_quantile_imp(c.dist, c.param, true);
+} // quantile complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mean(const mapairy_distribution<RealType, Policy> &dist)
+{
+    return dist.location();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType variance(const mapairy_distribution<RealType, Policy>& /*dist*/)
+{
+    return boost::math::numeric_limits<RealType>::infinity();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mode(const mapairy_distribution<RealType, Policy>& dist)
+{
+    return detail::mapairy_mode_imp(dist);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType median(const mapairy_distribution<RealType, Policy>& dist)
+{
+    return detail::mapairy_median_imp(dist);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const mapairy_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no skewness:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Map-Airy Distribution has no skewness");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::skewness(mapairy<%1%>&)",
+        "The Map-Airy distribution does not have a skewness: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const mapairy_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Map-Airy Distribution has no kurtosis");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis(mapairy<%1%>&)",
+        "The Map-Airy distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const mapairy_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis excess:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The Map-Airy Distribution has no kurtosis excess");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis_excess(mapairy<%1%>&)",
+        "The Map-Airy distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const mapairy_distribution<RealType, Policy>& dist)
+{
+    return detail::mapairy_entropy_imp(dist);
+}
+
+}} // namespaces
+
+
+#endif // BOOST_STATS_MAPAIRY_HPP
diff --git a/include/boost/math/distributions/negative_binomial.hpp b/include/boost/math/distributions/negative_binomial.hpp
index 18eec09939..f520c94803 100644
--- a/include/boost/math/distributions/negative_binomial.hpp
+++ b/include/boost/math/distributions/negative_binomial.hpp
@@ -44,6 +44,10 @@
 #ifndef BOOST_MATH_SPECIAL_NEGATIVE_BINOMIAL_HPP
 #define BOOST_MATH_SPECIAL_NEGATIVE_BINOMIAL_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x) == Ix(a, b).
 #include <boost/math/distributions/complement.hpp> // complement.
@@ -51,9 +55,7 @@
 #include <boost/math/special_functions/fpclassify.hpp> // isnan.
 #include <boost/math/tools/roots.hpp> // for root finding.
 #include <boost/math/distributions/detail/inv_discrete_quantile.hpp>
-
-#include <limits> // using std::numeric_limits;
-#include <utility>
+#include <boost/math/policies/error_handling.hpp>
 
 #if defined (BOOST_MSVC)
 #  pragma warning(push)
@@ -70,7 +72,7 @@ namespace boost
     {
       // Common error checking routines for negative binomial distribution functions:
       template <class RealType, class Policy>
-      inline bool check_successes(const char* function, const RealType& r, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_successes(const char* function, const RealType& r, RealType* result, const Policy& pol)
       {
         if( !(boost::math::isfinite)(r) || (r <= 0) )
         {
@@ -82,7 +84,7 @@ namespace boost
         return true;
       }
       template <class RealType, class Policy>
-      inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_success_fraction(const char* function, const RealType& p, RealType* result, const Policy& pol)
       {
         if( !(boost::math::isfinite)(p) || (p < 0) || (p > 1) )
         {
@@ -94,13 +96,13 @@ namespace boost
         return true;
       }
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& r, const RealType& p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& r, const RealType& p, RealType* result, const Policy& pol)
       {
         return check_success_fraction(function, p, result, pol)
           && check_successes(function, r, result, pol);
       }
       template <class RealType, class Policy>
-      inline bool check_dist_and_k(const char* function, const RealType& r, const RealType& p, RealType k, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, const RealType& r, const RealType& p, RealType k, RealType* result, const Policy& pol)
       {
         if(check_dist(function, r, p, result, pol) == false)
         {
@@ -117,7 +119,7 @@ namespace boost
       } // Check_dist_and_k
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_prob(const char* function, const RealType& r, RealType p, RealType prob, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, const RealType& r, RealType p, RealType prob, RealType* result, const Policy& pol)
       {
         if((check_dist(function, r, p, result, pol) && detail::check_probability(function, prob, result, pol)) == false)
         {
@@ -134,7 +136,7 @@ namespace boost
       typedef RealType value_type;
       typedef Policy policy_type;
 
-      negative_binomial_distribution(RealType r, RealType p) : m_r(r), m_p(p)
+      BOOST_MATH_GPU_ENABLED negative_binomial_distribution(RealType r, RealType p) : m_r(r), m_p(p)
       { // Constructor.
         RealType result;
         negative_binomial_detail::check_dist(
@@ -145,21 +147,21 @@ namespace boost
       } // negative_binomial_distribution constructor.
 
       // Private data getter class member functions.
-      RealType success_fraction() const
+      BOOST_MATH_GPU_ENABLED RealType success_fraction() const
       { // Probability of success as fraction in range 0 to 1.
         return m_p;
       }
-      RealType successes() const
+      BOOST_MATH_GPU_ENABLED RealType successes() const
       { // Total number of successes r.
         return m_r;
       }
 
-      static RealType find_lower_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_lower_bound_on_p(
         RealType trials,
         RealType successes,
         RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test.
       {
-        static const char* function = "boost::math::negative_binomial<%1%>::find_lower_bound_on_p";
+        constexpr auto function = "boost::math::negative_binomial<%1%>::find_lower_bound_on_p";
         RealType result = 0;  // of error checks.
         RealType failures = trials - successes;
         if(false == detail::check_probability(function, alpha, &result, Policy())
@@ -179,12 +181,12 @@ namespace boost
         return ibeta_inv(successes, failures + 1, alpha, static_cast<RealType*>(nullptr), Policy());
       } // find_lower_bound_on_p
 
-      static RealType find_upper_bound_on_p(
+      BOOST_MATH_GPU_ENABLED static RealType find_upper_bound_on_p(
         RealType trials,
         RealType successes,
         RealType alpha) // alpha 0.05 equivalent to 95% for one-sided test.
       {
-        static const char* function = "boost::math::negative_binomial<%1%>::find_upper_bound_on_p";
+        constexpr auto function = "boost::math::negative_binomial<%1%>::find_upper_bound_on_p";
         RealType result = 0;  // of error checks.
         RealType failures = trials - successes;
         if(false == negative_binomial_detail::check_dist_and_k(
@@ -210,12 +212,12 @@ namespace boost
       // Estimate number of trials :
       // "How many trials do I need to be P% sure of seeing k or fewer failures?"
 
-      static RealType find_minimum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_minimum_number_of_trials(
         RealType k,     // number of failures (k >= 0).
         RealType p,     // success fraction 0 <= p <= 1.
         RealType alpha) // risk level threshold 0 <= alpha <= 1.
       {
-        static const char* function = "boost::math::negative_binomial<%1%>::find_minimum_number_of_trials";
+        constexpr auto function = "boost::math::negative_binomial<%1%>::find_minimum_number_of_trials";
         // Error checks:
         RealType result = 0;
         if(false == negative_binomial_detail::check_dist_and_k(
@@ -227,12 +229,12 @@ namespace boost
         return result + k;
       } // RealType find_number_of_failures
 
-      static RealType find_maximum_number_of_trials(
+      BOOST_MATH_GPU_ENABLED static RealType find_maximum_number_of_trials(
         RealType k,     // number of failures (k >= 0).
         RealType p,     // success fraction 0 <= p <= 1.
         RealType alpha) // risk level threshold 0 <= alpha <= 1.
       {
-        static const char* function = "boost::math::negative_binomial<%1%>::find_maximum_number_of_trials";
+        constexpr auto function = "boost::math::negative_binomial<%1%>::find_maximum_number_of_trials";
         // Error checks:
         RealType result = 0;
         if(false == negative_binomial_detail::check_dist_and_k(
@@ -257,22 +259,22 @@ namespace boost
     #endif
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const negative_binomial_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const negative_binomial_distribution<RealType, Policy>& /* dist */)
     { // Range of permissible values for random variable k.
        using boost::math::tools::max_value;
-       return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
+       return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // max_integer?
     }
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const negative_binomial_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const negative_binomial_distribution<RealType, Policy>& /* dist */)
     { // Range of supported values for random variable k.
        // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
        using boost::math::tools::max_value;
-       return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>()); // max_integer?
+       return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>()); // max_integer?
     }
 
     template <class RealType, class Policy>
-    inline RealType mean(const negative_binomial_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const negative_binomial_distribution<RealType, Policy>& dist)
     { // Mean of Negative Binomial distribution = r(1-p)/p.
       return dist.successes() * (1 - dist.success_fraction() ) / dist.success_fraction();
     } // mean
@@ -285,14 +287,14 @@ namespace boost
     // Now implemented via quantile(half) in derived accessors.
 
     template <class RealType, class Policy>
-    inline RealType mode(const negative_binomial_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const negative_binomial_distribution<RealType, Policy>& dist)
     { // Mode of Negative Binomial distribution = floor[(r-1) * (1 - p)/p]
       BOOST_MATH_STD_USING // ADL of std functions.
       return floor((dist.successes() -1) * (1 - dist.success_fraction()) / dist.success_fraction());
     } // mode
 
     template <class RealType, class Policy>
-    inline RealType skewness(const negative_binomial_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const negative_binomial_distribution<RealType, Policy>& dist)
     { // skewness of Negative Binomial distribution = 2-p / (sqrt(r(1-p))
       BOOST_MATH_STD_USING // ADL of std functions.
       RealType p = dist.success_fraction();
@@ -303,7 +305,7 @@ namespace boost
     } // skewness
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const negative_binomial_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const negative_binomial_distribution<RealType, Policy>& dist)
     { // kurtosis of Negative Binomial distribution
       // http://en.wikipedia.org/wiki/Negative_binomial is kurtosis_excess so add 3
       RealType p = dist.success_fraction();
@@ -312,7 +314,7 @@ namespace boost
     } // kurtosis
 
      template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const negative_binomial_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const negative_binomial_distribution<RealType, Policy>& dist)
     { // kurtosis excess of Negative Binomial distribution
       // http://mathworld.wolfram.com/Kurtosis.html table of kurtosis_excess
       RealType p = dist.success_fraction();
@@ -321,7 +323,7 @@ namespace boost
     } // kurtosis_excess
 
     template <class RealType, class Policy>
-    inline RealType variance(const negative_binomial_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const negative_binomial_distribution<RealType, Policy>& dist)
     { // Variance of Binomial distribution = r (1-p) / p^2.
       return  dist.successes() * (1 - dist.success_fraction())
         / (dist.success_fraction() * dist.success_fraction());
@@ -335,11 +337,11 @@ namespace boost
     // chf of Negative Binomial distribution provided by derived accessors.
 
     template <class RealType, class Policy>
-    inline RealType pdf(const negative_binomial_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED inline RealType pdf(const negative_binomial_distribution<RealType, Policy>& dist, const RealType& k)
     { // Probability Density/Mass Function.
       BOOST_FPU_EXCEPTION_GUARD
 
-      static const char* function = "boost::math::pdf(const negative_binomial_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::pdf(const negative_binomial_distribution<%1%>&, %1%)";
 
       RealType r = dist.successes();
       RealType p = dist.success_fraction();
@@ -361,9 +363,9 @@ namespace boost
     } // negative_binomial_pdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const negative_binomial_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const negative_binomial_distribution<RealType, Policy>& dist, const RealType& k)
     { // Cumulative Distribution Function of Negative Binomial.
-      static const char* function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)";
       using boost::math::ibeta; // Regularized incomplete beta function.
       // k argument may be integral, signed, or unsigned, or floating point.
       // If necessary, it has already been promoted from an integral type.
@@ -387,10 +389,10 @@ namespace boost
     } // cdf Cumulative Distribution Function Negative Binomial.
 
       template <class RealType, class Policy>
-      inline RealType cdf(const complemented2_type<negative_binomial_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<negative_binomial_distribution<RealType, Policy>, RealType>& c)
       { // Complemented Cumulative Distribution Function Negative Binomial.
 
-      static const char* function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::cdf(const negative_binomial_distribution<%1%>&, %1%)";
       using boost::math::ibetac; // Regularized incomplete beta function complement.
       // k argument may be integral, signed, or unsigned, or floating point.
       // If necessary, it has already been promoted from an integral type.
@@ -421,7 +423,7 @@ namespace boost
     } // cdf Cumulative Distribution Function Negative Binomial.
 
     template <class RealType, class Policy>
-    inline RealType quantile(const negative_binomial_distribution<RealType, Policy>& dist, const RealType& P)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const negative_binomial_distribution<RealType, Policy>& dist, const RealType& P)
     { // Quantile, percentile/100 or Percent Point Negative Binomial function.
       // Return the number of expected failures k for a given probability p.
 
@@ -429,7 +431,7 @@ namespace boost
       // MAthCAD pnbinom return smallest k such that negative_binomial(k, n, p) >= probability.
       // k argument may be integral, signed, or unsigned, or floating point.
       // BUT Cephes/CodeCogs says: finds argument p (0 to 1) such that cdf(k, n, p) = y
-      static const char* function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)";
       BOOST_MATH_STD_USING // ADL of std functions.
 
       RealType p = dist.success_fraction();
@@ -484,7 +486,7 @@ namespace boost
          //
          // Cornish-Fisher Negative binomial approximation not accurate in this area:
          //
-         guess = (std::min)(RealType(r * 2), RealType(10));
+         guess = BOOST_MATH_GPU_SAFE_MIN(RealType(r * 2), RealType(10));
       }
       else
          factor = (1-P < sqrt(tools::epsilon<RealType>())) ? 2 : (guess < 20 ? 1.2f : 1.1f);
@@ -492,7 +494,7 @@ namespace boost
       //
       // Max iterations permitted:
       //
-      std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+      boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
       typedef typename Policy::discrete_quantile_type discrete_type;
       return detail::inverse_discrete_quantile(
          dist,
@@ -506,11 +508,11 @@ namespace boost
     } // RealType quantile(const negative_binomial_distribution dist, p)
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<negative_binomial_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<negative_binomial_distribution<RealType, Policy>, RealType>& c)
     {  // Quantile or Percent Point Binomial function.
        // Return the number of expected failures k for a given
        // complement of the probability Q = 1 - P.
-       static const char* function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::quantile(const negative_binomial_distribution<%1%>&, %1%)";
        BOOST_MATH_STD_USING
 
        // Error checks:
@@ -571,7 +573,7 @@ namespace boost
           //
           // Cornish-Fisher Negative binomial approximation not accurate in this area:
           //
-          guess = (std::min)(RealType(r * 2), RealType(10));
+          guess = BOOST_MATH_GPU_SAFE_MIN(RealType(r * 2), RealType(10));
        }
        else
           factor = (Q < sqrt(tools::epsilon<RealType>())) ? 2 : (guess < 20 ? 1.2f : 1.1f);
@@ -579,7 +581,7 @@ namespace boost
        //
        // Max iterations permitted:
        //
-       std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+       boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
        typedef typename Policy::discrete_quantile_type discrete_type;
        return detail::inverse_discrete_quantile(
           dist,
diff --git a/include/boost/math/distributions/non_central_beta.hpp b/include/boost/math/distributions/non_central_beta.hpp
index 66b12e870a..9dd7d5e60b 100644
--- a/include/boost/math/distributions/non_central_beta.hpp
+++ b/include/boost/math/distributions/non_central_beta.hpp
@@ -1,7 +1,7 @@
 // boost\math\distributions\non_central_beta.hpp
 
 // Copyright John Maddock 2008.
-
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -10,6 +10,10 @@
 #ifndef BOOST_MATH_SPECIAL_NON_CENTRAL_BETA_HPP
 #define BOOST_MATH_SPECIAL_NON_CENTRAL_BETA_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/beta.hpp> // for incomplete gamma. gamma_q
 #include <boost/math/distributions/complement.hpp> // complements
@@ -20,6 +24,7 @@
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/tools/roots.hpp> // for root finding.
 #include <boost/math/tools/series.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 namespace boost
 {
@@ -32,14 +37,14 @@ namespace boost
       namespace detail{
 
          template <class T, class Policy>
-         T non_central_beta_p(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0)
+         BOOST_MATH_GPU_ENABLED T non_central_beta_p(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0)
          {
             BOOST_MATH_STD_USING
                using namespace boost::math;
             //
             // Variables come first:
             //
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
             T errtol = boost::math::policies::get_epsilon<T, Policy>();
             T l2 = lam / 2;
             //
@@ -86,7 +91,7 @@ namespace boost
             // direction for recursion:
             //
             T last_term = 0;
-            std::uintmax_t count = k;
+            boost::math::uintmax_t count = k;
             for(auto i = k; i >= 0; --i)
             {
                T term = beta * pois;
@@ -120,7 +125,7 @@ namespace boost
                   break;
                }
                last_term = term;
-               if(static_cast<std::uintmax_t>(count + i - k) > max_iter)
+               if(static_cast<boost::math::uintmax_t>(count + i - k) > max_iter)
                {
                   return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
                }
@@ -129,14 +134,14 @@ namespace boost
          }
 
          template <class T, class Policy>
-         T non_central_beta_q(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0)
+         BOOST_MATH_GPU_ENABLED T non_central_beta_q(T a, T b, T lam, T x, T y, const Policy& pol, T init_val = 0)
          {
             BOOST_MATH_STD_USING
                using namespace boost::math;
             //
             // Variables come first:
             //
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
             T errtol = boost::math::policies::get_epsilon<T, Policy>();
             T l2 = lam / 2;
             //
@@ -185,7 +190,7 @@ namespace boost
             // of the bulk of the sum:
             //
             T last_term = 0;
-            std::uintmax_t count = 0;
+            boost::math::uintmax_t count = 0;
             for(auto i = k + 1; ; ++i)
             {
                poisf *= l2 / i;
@@ -199,7 +204,7 @@ namespace boost
                   count = i - k;
                   break;
                }
-               if(static_cast<std::uintmax_t>(i - k) > max_iter)
+               if(static_cast<boost::math::uintmax_t>(i - k) > max_iter)
                {
                   return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
                }
@@ -213,7 +218,7 @@ namespace boost
                {
                   break;
                }
-               if(static_cast<std::uintmax_t>(count + k - i) > max_iter)
+               if(static_cast<boost::math::uintmax_t>(count + k - i) > max_iter)
                {
                   return policies::raise_evaluation_error("cdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
                }
@@ -228,7 +233,7 @@ namespace boost
          }
 
          template <class RealType, class Policy>
-         inline RealType non_central_beta_cdf(RealType x, RealType y, RealType a, RealType b, RealType l, bool invert, const Policy&)
+         BOOST_MATH_GPU_ENABLED inline RealType non_central_beta_cdf(RealType x, RealType y, RealType a, RealType b, RealType l, bool invert, const Policy&)
          {
             typedef typename policies::evaluation<RealType, Policy>::type value_type;
             typedef typename policies::normalise<
@@ -283,10 +288,10 @@ namespace boost
          template <class T, class Policy>
          struct nc_beta_quantile_functor
          {
-            nc_beta_quantile_functor(const non_central_beta_distribution<T,Policy>& d, T t, bool c)
+            BOOST_MATH_GPU_ENABLED nc_beta_quantile_functor(const non_central_beta_distribution<T,Policy>& d, T t, bool c)
                : dist(d), target(t), comp(c) {}
 
-            T operator()(const T& x)
+            BOOST_MATH_GPU_ENABLED T operator()(const T& x)
             {
                return comp ?
                   T(target - cdf(complement(dist, x)))
@@ -305,10 +310,10 @@ namespace boost
          // heuristics.
          //
          template <class F, class T, class Tol, class Policy>
-         std::pair<T, T> bracket_and_solve_root_01(F f, const T& guess, T factor, bool rising, Tol tol, std::uintmax_t& max_iter, const Policy& pol)
+         BOOST_MATH_GPU_ENABLED boost::math::pair<T, T> bracket_and_solve_root_01(F f, const T& guess, T factor, bool rising, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol)
          {
             BOOST_MATH_STD_USING
-               static const char* function = "boost::math::tools::bracket_and_solve_root_01<%1%>";
+               constexpr auto function = "boost::math::tools::bracket_and_solve_root_01<%1%>";
             //
             // Set up initial brackets:
             //
@@ -319,7 +324,7 @@ namespace boost
             //
             // Set up invocation count:
             //
-            std::uintmax_t count = max_iter - 1;
+            boost::math::uintmax_t count = max_iter - 1;
 
             if((fa < 0) == (guess < 0 ? !rising : rising))
             {
@@ -332,7 +337,7 @@ namespace boost
                   if(count == 0)
                   {
                      b = policies::raise_evaluation_error(function, "Unable to bracket root, last nearest value was %1%", b, pol); // LCOV_EXCL_LINE
-                     return std::make_pair(a, b);
+                     return boost::math::make_pair(a, b);
                   }
                   //
                   // Heuristic: every 20 iterations we double the growth factor in case the
@@ -365,12 +370,12 @@ namespace boost
                      // Escape route just in case the answer is zero!
                      max_iter -= count;
                      max_iter += 1;
-                     return a > 0 ? std::make_pair(T(0), T(a)) : std::make_pair(T(a), T(0));
+                     return a > 0 ? boost::math::make_pair(T(0), T(a)) : boost::math::make_pair(T(a), T(0));
                   }
                   if(count == 0)
                   {
                      a = policies::raise_evaluation_error(function, "Unable to bracket root, last nearest value was %1%", a, pol); // LCOV_EXCL_LINE
-                     return std::make_pair(a, b);
+                     return boost::math::make_pair(a, b);
                   }
                   //
                   // Heuristic: every 20 iterations we double the growth factor in case the
@@ -391,7 +396,7 @@ namespace boost
             }
             max_iter -= count;
             max_iter += 1;
-            std::pair<T, T> r = toms748_solve(
+            boost::math::pair<T, T> r = toms748_solve(
                f,
                (a < 0 ? b : a),
                (a < 0 ? a : b),
@@ -406,9 +411,9 @@ namespace boost
          }
 
          template <class RealType, class Policy>
-         RealType nc_beta_quantile(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& p, bool comp)
+         BOOST_MATH_GPU_ENABLED RealType nc_beta_quantile(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& p, bool comp)
          {
-            static const char* function = "quantile(non_central_beta_distribution<%1%>, %1%)";
+            constexpr auto function = "quantile(non_central_beta_distribution<%1%>, %1%)";
             typedef typename policies::evaluation<RealType, Policy>::type value_type;
             typedef typename policies::normalise<
                Policy,
@@ -505,9 +510,9 @@ namespace boost
             detail::nc_beta_quantile_functor<value_type, Policy>
                f(non_central_beta_distribution<value_type, Policy>(a, b, l), p, comp);
             tools::eps_tolerance<value_type> tol(policies::digits<RealType, Policy>());
-            std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
 
-            std::pair<value_type, value_type> ir
+            boost::math::pair<value_type, value_type> ir
                = bracket_and_solve_root_01(
                   f, guess, value_type(2.5), true, tol,
                   max_iter, Policy());
@@ -530,7 +535,7 @@ namespace boost
          }
 
          template <class T, class Policy>
-         T non_central_beta_pdf(T a, T b, T lam, T x, T y, const Policy& pol)
+         BOOST_MATH_GPU_ENABLED T non_central_beta_pdf(T a, T b, T lam, T x, T y, const Policy& pol)
          {
             BOOST_MATH_STD_USING
             //
@@ -541,7 +546,7 @@ namespace boost
             //
             // Variables come first:
             //
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
             T errtol = boost::math::policies::get_epsilon<T, Policy>();
             T l2 = lam / 2;
             //
@@ -580,7 +585,7 @@ namespace boost
             //
             // Stable backwards recursion first:
             //
-            std::uintmax_t count = k;
+            boost::math::uintmax_t count = k;
             T ratio = 0;
             T old_ratio = 0;
             for(auto i = k; i >= 0; --i)
@@ -615,7 +620,7 @@ namespace boost
                   break;
                }
                old_ratio = ratio;
-               if(static_cast<std::uintmax_t>(count + i - k) > max_iter)
+               if(static_cast<boost::math::uintmax_t>(count + i - k) > max_iter)
                {
                   return policies::raise_evaluation_error("pdf(non_central_beta_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
                }
@@ -624,10 +629,10 @@ namespace boost
          }
 
          template <class RealType, class Policy>
-         RealType nc_beta_pdf(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& x)
+         BOOST_MATH_GPU_ENABLED RealType nc_beta_pdf(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& x)
          {
             BOOST_MATH_STD_USING
-            static const char* function = "pdf(non_central_beta_distribution<%1%>, %1%)";
+            constexpr auto function = "pdf(non_central_beta_distribution<%1%>, %1%)";
             typedef typename policies::evaluation<RealType, Policy>::type value_type;
             typedef typename policies::normalise<
                Policy,
@@ -672,8 +677,8 @@ namespace boost
          struct hypergeometric_2F2_sum
          {
             typedef T result_type;
-            hypergeometric_2F2_sum(T a1_, T a2_, T b1_, T b2_, T z_) : a1(a1_), a2(a2_), b1(b1_), b2(b2_), z(z_), term(1), k(0) {}
-            T operator()()
+            BOOST_MATH_GPU_ENABLED hypergeometric_2F2_sum(T a1_, T a2_, T b1_, T b2_, T z_) : a1(a1_), a2(a2_), b1(b1_), b2(b2_), z(z_), term(1), k(0) {}
+            BOOST_MATH_GPU_ENABLED T operator()()
             {
                T result = term;
                term *= a1 * a2 / (b1 * b2);
@@ -690,14 +695,14 @@ namespace boost
          };
 
          template <class T, class Policy>
-         T hypergeometric_2F2(T a1, T a2, T b1, T b2, T z, const Policy& pol)
+         BOOST_MATH_GPU_ENABLED T hypergeometric_2F2(T a1, T a2, T b1, T b2, T z, const Policy& pol)
          {
             typedef typename policies::evaluation<T, Policy>::type value_type;
 
             const char* function = "boost::math::detail::hypergeometric_2F2<%1%>(%1%,%1%,%1%,%1%,%1%)";
 
             hypergeometric_2F2_sum<value_type> s(a1, a2, b1, b2, z);
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
             value_type result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<value_type, Policy>(), max_iter);
 
@@ -714,7 +719,7 @@ namespace boost
          typedef RealType value_type;
          typedef Policy policy_type;
 
-         non_central_beta_distribution(RealType a_, RealType b_, RealType lambda) : a(a_), b(b_), ncp(lambda)
+         BOOST_MATH_GPU_ENABLED non_central_beta_distribution(RealType a_, RealType b_, RealType lambda) : a(a_), b(b_), ncp(lambda)
          {
             const char* function = "boost::math::non_central_beta_distribution<%1%>::non_central_beta_distribution(%1%,%1%)";
             RealType r;
@@ -731,15 +736,15 @@ namespace boost
                Policy());
          } // non_central_beta_distribution constructor.
 
-         RealType alpha() const
+         BOOST_MATH_GPU_ENABLED RealType alpha() const
          { // Private data getter function.
             return a;
          }
-         RealType beta() const
+         BOOST_MATH_GPU_ENABLED RealType beta() const
          { // Private data getter function.
             return b;
          }
-         RealType non_centrality() const
+         BOOST_MATH_GPU_ENABLED RealType non_centrality() const
          { // Private data getter function.
             return ncp;
          }
@@ -760,24 +765,24 @@ namespace boost
       // Non-member functions to give properties of the distribution.
 
       template <class RealType, class Policy>
-      inline const std::pair<RealType, RealType> range(const non_central_beta_distribution<RealType, Policy>& /* dist */)
+      BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const non_central_beta_distribution<RealType, Policy>& /* dist */)
       { // Range of permissible values for random variable k.
          using boost::math::tools::max_value;
-         return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
+         return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
       }
 
       template <class RealType, class Policy>
-      inline const std::pair<RealType, RealType> support(const non_central_beta_distribution<RealType, Policy>& /* dist */)
+      BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const non_central_beta_distribution<RealType, Policy>& /* dist */)
       { // Range of supported values for random variable k.
          // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
          using boost::math::tools::max_value;
-         return std::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
+         return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), static_cast<RealType>(1));
       }
 
       template <class RealType, class Policy>
-      inline RealType mode(const non_central_beta_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType mode(const non_central_beta_distribution<RealType, Policy>& dist)
       { // mode.
-         static const char* function = "mode(non_central_beta_distribution<%1%> const&)";
+         constexpr auto function = "mode(non_central_beta_distribution<%1%> const&)";
 
          RealType a = dist.alpha();
          RealType b = dist.beta();
@@ -812,7 +817,7 @@ namespace boost
       // later:
       //
       template <class RealType, class Policy>
-      inline RealType mean(const non_central_beta_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType mean(const non_central_beta_distribution<RealType, Policy>& dist)
       {
          BOOST_MATH_STD_USING
          RealType a = dist.alpha();
@@ -823,7 +828,7 @@ namespace boost
       } // mean
 
       template <class RealType, class Policy>
-      inline RealType variance(const non_central_beta_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType variance(const non_central_beta_distribution<RealType, Policy>& dist)
       { 
          //
          // Relative error of this function may be arbitrarily large... absolute
@@ -843,41 +848,41 @@ namespace boost
       // RealType standard_deviation(const non_central_beta_distribution<RealType, Policy>& dist)
       // standard_deviation provided by derived accessors.
       template <class RealType, class Policy>
-      inline RealType skewness(const non_central_beta_distribution<RealType, Policy>& /*dist*/)
+      BOOST_MATH_GPU_ENABLED inline RealType skewness(const non_central_beta_distribution<RealType, Policy>& /*dist*/)
       { // skewness = sqrt(l).
          const char* function = "boost::math::non_central_beta_distribution<%1%>::skewness()";
          typedef typename Policy::assert_undefined_type assert_type;
-         static_assert(assert_type::value == 0, "Assert type is undefined.");
+         static_assert(assert_type::value == 0, "The Non Central Beta Distribution has no skewness.");
 
          return policies::raise_evaluation_error<RealType>(function, "This function is not yet implemented, the only sensible result is %1%.", // LCOV_EXCL_LINE
-            std::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?  LCOV_EXCL_LINE
+            boost::math::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?  LCOV_EXCL_LINE
       }
 
       template <class RealType, class Policy>
-      inline RealType kurtosis_excess(const non_central_beta_distribution<RealType, Policy>& /*dist*/)
+      BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const non_central_beta_distribution<RealType, Policy>& /*dist*/)
       {
          const char* function = "boost::math::non_central_beta_distribution<%1%>::kurtosis_excess()";
          typedef typename Policy::assert_undefined_type assert_type;
-         static_assert(assert_type::value == 0, "Assert type is undefined.");
+         static_assert(assert_type::value == 0, "The Non Central Beta Distribution has no kurtosis excess.");
 
          return policies::raise_evaluation_error<RealType>(function, "This function is not yet implemented, the only sensible result is %1%.", // LCOV_EXCL_LINE
-            std::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?  LCOV_EXCL_LINE
+            boost::math::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?  LCOV_EXCL_LINE
       } // kurtosis_excess
 
       template <class RealType, class Policy>
-      inline RealType kurtosis(const non_central_beta_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const non_central_beta_distribution<RealType, Policy>& dist)
       {
          return kurtosis_excess(dist) + 3;
       }
 
       template <class RealType, class Policy>
-      inline RealType pdf(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& x)
+      BOOST_MATH_GPU_ENABLED inline RealType pdf(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& x)
       { // Probability Density/Mass Function.
          return detail::nc_beta_pdf(dist, x);
       } // pdf
 
       template <class RealType, class Policy>
-      RealType cdf(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& x)
+      BOOST_MATH_GPU_ENABLED RealType cdf(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& x)
       {
          const char* function = "boost::math::non_central_beta_distribution<%1%>::cdf(%1%)";
             RealType a = dist.alpha();
@@ -912,7 +917,7 @@ namespace boost
       } // cdf
 
       template <class RealType, class Policy>
-      RealType cdf(const complemented2_type<non_central_beta_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type<non_central_beta_distribution<RealType, Policy>, RealType>& c)
       { // Complemented Cumulative Distribution Function
          const char* function = "boost::math::non_central_beta_distribution<%1%>::cdf(%1%)";
          non_central_beta_distribution<RealType, Policy> const& dist = c.dist;
@@ -949,13 +954,13 @@ namespace boost
       } // ccdf
 
       template <class RealType, class Policy>
-      inline RealType quantile(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& p)
+      BOOST_MATH_GPU_ENABLED inline RealType quantile(const non_central_beta_distribution<RealType, Policy>& dist, const RealType& p)
       { // Quantile (or Percent Point) function.
          return detail::nc_beta_quantile(dist, p, false);
       } // quantile
 
       template <class RealType, class Policy>
-      inline RealType quantile(const complemented2_type<non_central_beta_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<non_central_beta_distribution<RealType, Policy>, RealType>& c)
       { // Quantile (or Percent Point) function.
          return detail::nc_beta_quantile(c.dist, c.param, true);
       } // quantile complement.
diff --git a/include/boost/math/distributions/non_central_chi_squared.hpp b/include/boost/math/distributions/non_central_chi_squared.hpp
index f59be9932c..5917b3732d 100644
--- a/include/boost/math/distributions/non_central_chi_squared.hpp
+++ b/include/boost/math/distributions/non_central_chi_squared.hpp
@@ -1,7 +1,7 @@
 // boost\math\distributions\non_central_chi_squared.hpp
 
 // Copyright John Maddock 2008.
-
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -10,6 +10,10 @@
 #ifndef BOOST_MATH_SPECIAL_NON_CENTRAL_CHI_SQUARE_HPP
 #define BOOST_MATH_SPECIAL_NON_CENTRAL_CHI_SQUARE_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp> // for incomplete gamma. gamma_q
 #include <boost/math/special_functions/bessel.hpp> // for cyl_bessel_i
@@ -21,6 +25,7 @@
 #include <boost/math/tools/roots.hpp> // for root finding.
 #include <boost/math/distributions/detail/generic_mode.hpp>
 #include <boost/math/distributions/detail/generic_quantile.hpp>
+#include <boost/math/policies/policy.hpp>
 
 namespace boost
 {
@@ -33,7 +38,7 @@ namespace boost
       namespace detail{
 
          template <class T, class Policy>
-         T non_central_chi_square_q(T x, T f, T theta, const Policy& pol, T init_sum = 0)
+         BOOST_MATH_GPU_ENABLED T non_central_chi_square_q(T x, T f, T theta, const Policy& pol, T init_sum = 0)
          {
             //
             // Computes the complement of the Non-Central Chi-Square
@@ -62,7 +67,7 @@ namespace boost
             T lambda = theta / 2;
             T del = f / 2;
             T y = x / 2;
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
             T errtol = boost::math::policies::get_epsilon<T, Policy>();
             T sum = init_sum;
             //
@@ -89,7 +94,7 @@ namespace boost
             // recurrences:
             //
             long long i;
-            for(i = k; static_cast<std::uintmax_t>(i-k) < max_iter; ++i)
+            for(i = k; static_cast<boost::math::uintmax_t>(i-k) < max_iter; ++i)
             {
                T term = poisf * gamf;
                sum += term;
@@ -100,7 +105,7 @@ namespace boost
                   break;
             }
             //Error check:
-            if(static_cast<std::uintmax_t>(i-k) >= max_iter)
+            if(static_cast<boost::math::uintmax_t>(i-k) >= max_iter)
                return policies::raise_evaluation_error("cdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
             //
             // Now backwards iteration: the gamma
@@ -126,7 +131,7 @@ namespace boost
          }
 
          template <class T, class Policy>
-         T non_central_chi_square_p_ding(T x, T f, T theta, const Policy& pol, T init_sum = 0)
+         BOOST_MATH_GPU_ENABLED T non_central_chi_square_p_ding(T x, T f, T theta, const Policy& pol, T init_sum = 0)
          {
             //
             // This is an implementation of:
@@ -155,12 +160,12 @@ namespace boost
             if(sum == 0)
                return sum;
 
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
             T errtol = boost::math::policies::get_epsilon<T, Policy>();
 
             int i;
             T lterm(0), term(0);
-            for(i = 1; static_cast<std::uintmax_t>(i) < max_iter; ++i)
+            for(i = 1; static_cast<boost::math::uintmax_t>(i) < max_iter; ++i)
             {
                tk = tk * x / (f + 2 * i);
                uk = uk * lambda / i;
@@ -172,14 +177,14 @@ namespace boost
                   break;
             }
             //Error check:
-            if(static_cast<std::uintmax_t>(i) >= max_iter)
+            if(static_cast<boost::math::uintmax_t>(i) >= max_iter)
                return policies::raise_evaluation_error("cdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
             return sum;
          }
 
 
          template <class T, class Policy>
-         T non_central_chi_square_p(T y, T n, T lambda, const Policy& pol, T init_sum)
+         BOOST_MATH_GPU_ENABLED T non_central_chi_square_p(T y, T n, T lambda, const Policy& pol, T init_sum)
          {
             //
             // This is taken more or less directly from:
@@ -198,7 +203,7 @@ namespace boost
             // Special case:
             if(y == 0)
                return 0;
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
             T errtol = boost::math::policies::get_epsilon<T, Policy>();
             T errorf(0), errorb(0);
 
@@ -266,23 +271,23 @@ namespace boost
                errorf = poiskf * gamkf;
                sum += errorf;
                ++i;
-            }while((fabs(errorf / sum) > errtol) && (static_cast<std::uintmax_t>(i) < max_iter));
+            }while((fabs(errorf / sum) > errtol) && (static_cast<boost::math::uintmax_t>(i) < max_iter));
 
             //Error check:
-            if(static_cast<std::uintmax_t>(i) >= max_iter)
+            if(static_cast<boost::math::uintmax_t>(i) >= max_iter)
                return policies::raise_evaluation_error("cdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
 
             return sum;
          }
 
          template <class T, class Policy>
-         T non_central_chi_square_pdf(T x, T n, T lambda, const Policy& pol)
+         BOOST_MATH_GPU_ENABLED T non_central_chi_square_pdf(T x, T n, T lambda, const Policy& pol)
          {
             //
             // As above but for the PDF:
             //
             BOOST_MATH_STD_USING
-            std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
             T errtol = boost::math::policies::get_epsilon<T, Policy>();
             T x2 = x / 2;
             T n2 = n / 2;
@@ -298,7 +303,7 @@ namespace boost
                sum += pois;
                if(pois / sum < errtol)
                   break;
-               if(static_cast<std::uintmax_t>(i - k) >= max_iter)
+               if(static_cast<boost::math::uintmax_t>(i - k) >= max_iter)
                   return policies::raise_evaluation_error("pdf(non_central_chi_squared_distribution<%1%>, %1%)", "Series did not converge, closest value was %1%", sum, pol); // LCOV_EXCL_LINE
                pois *= l2 * x2 / ((i + 1) * (n2 + i));
             }
@@ -313,7 +318,7 @@ namespace boost
          }
 
          template <class RealType, class Policy>
-         inline RealType non_central_chi_squared_cdf(RealType x, RealType k, RealType l, bool invert, const Policy&)
+         BOOST_MATH_GPU_ENABLED inline RealType non_central_chi_squared_cdf(RealType x, RealType k, RealType l, bool invert, const Policy&)
          {
             typedef typename policies::evaluation<RealType, Policy>::type value_type;
             typedef typename policies::normalise<
@@ -373,10 +378,10 @@ namespace boost
          template <class T, class Policy>
          struct nccs_quantile_functor
          {
-            nccs_quantile_functor(const non_central_chi_squared_distribution<T,Policy>& d, T t, bool c)
+            BOOST_MATH_GPU_ENABLED nccs_quantile_functor(const non_central_chi_squared_distribution<T,Policy>& d, T t, bool c)
                : dist(d), target(t), comp(c) {}
 
-            T operator()(const T& x)
+            BOOST_MATH_GPU_ENABLED T operator()(const T& x)
             {
                return comp ?
                   target - cdf(complement(dist, x))
@@ -390,10 +395,10 @@ namespace boost
          };
 
          template <class RealType, class Policy>
-         RealType nccs_quantile(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& p, bool comp)
+         BOOST_MATH_GPU_ENABLED RealType nccs_quantile(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& p, bool comp)
          {
             BOOST_MATH_STD_USING
-            static const char* function = "quantile(non_central_chi_squared_distribution<%1%>, %1%)";
+            constexpr auto function = "quantile(non_central_chi_squared_distribution<%1%>, %1%)";
             typedef typename policies::evaluation<RealType, Policy>::type value_type;
             typedef typename policies::normalise<
                Policy,
@@ -481,10 +486,10 @@ namespace boost
          }
 
          template <class RealType, class Policy>
-         RealType nccs_pdf(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
+         BOOST_MATH_GPU_ENABLED RealType nccs_pdf(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
          {
             BOOST_MATH_STD_USING
-            static const char* function = "pdf(non_central_chi_squared_distribution<%1%>, %1%)";
+            constexpr auto function = "pdf(non_central_chi_squared_distribution<%1%>, %1%)";
             typedef typename policies::evaluation<RealType, Policy>::type value_type;
             typedef typename policies::normalise<
                Policy,
@@ -545,11 +550,11 @@ namespace boost
          template <class RealType, class Policy>
          struct degrees_of_freedom_finder
          {
-            degrees_of_freedom_finder(
+            BOOST_MATH_GPU_ENABLED degrees_of_freedom_finder(
                RealType lam_, RealType x_, RealType p_, bool c)
                : lam(lam_), x(x_), p(p_), comp(c) {}
 
-            RealType operator()(const RealType& v)
+            BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& v)
             {
                non_central_chi_squared_distribution<RealType, Policy> d(v, lam);
                return comp ?
@@ -564,21 +569,21 @@ namespace boost
          };
 
          template <class RealType, class Policy>
-         inline RealType find_degrees_of_freedom(
+         BOOST_MATH_GPU_ENABLED inline RealType find_degrees_of_freedom(
             RealType lam, RealType x, RealType p, RealType q, const Policy& pol)
          {
-            const char* function = "non_central_chi_squared<%1%>::find_degrees_of_freedom";
+            constexpr auto function = "non_central_chi_squared<%1%>::find_degrees_of_freedom";
             if((p == 0) || (q == 0))
             {
                //
                // Can't a thing if one of p and q is zero:
                //
                return policies::raise_evaluation_error<RealType>(function, "Can't find degrees of freedom when the probability is 0 or 1, only possible answer is %1%", // LCOV_EXCL_LINE
-                  RealType(std::numeric_limits<RealType>::quiet_NaN()), Policy()); // LCOV_EXCL_LINE
+                  RealType(boost::math::numeric_limits<RealType>::quiet_NaN()), Policy()); // LCOV_EXCL_LINE
             }
             degrees_of_freedom_finder<RealType, Policy> f(lam, x, p < q ? p : q, p < q ? false : true);
             tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
-            std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
             //
             // Pick an initial guess that we know will give us a probability
             // right around 0.5.
@@ -586,7 +591,7 @@ namespace boost
             RealType guess = x - lam;
             if(guess < 1)
                guess = 1;
-            std::pair<RealType, RealType> ir = tools::bracket_and_solve_root(
+            boost::math::pair<RealType, RealType> ir = tools::bracket_and_solve_root(
                f, guess, RealType(2), false, tol, max_iter, pol);
             RealType result = ir.first + (ir.second - ir.first) / 2;
             if(max_iter >= policies::get_max_root_iterations<Policy>())
@@ -600,11 +605,11 @@ namespace boost
          template <class RealType, class Policy>
          struct non_centrality_finder
          {
-            non_centrality_finder(
+            BOOST_MATH_GPU_ENABLED non_centrality_finder(
                RealType v_, RealType x_, RealType p_, bool c)
                : v(v_), x(x_), p(p_), comp(c) {}
 
-            RealType operator()(const RealType& lam)
+            BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& lam)
             {
                non_central_chi_squared_distribution<RealType, Policy> d(v, lam);
                return comp ?
@@ -619,21 +624,21 @@ namespace boost
          };
 
          template <class RealType, class Policy>
-         inline RealType find_non_centrality(
+         BOOST_MATH_GPU_ENABLED inline RealType find_non_centrality(
             RealType v, RealType x, RealType p, RealType q, const Policy& pol)
          {
-            const char* function = "non_central_chi_squared<%1%>::find_non_centrality";
+            constexpr auto function = "non_central_chi_squared<%1%>::find_non_centrality";
             if((p == 0) || (q == 0))
             {
                //
                // Can't do a thing if one of p and q is zero:
                //
                return policies::raise_evaluation_error<RealType>(function, "Can't find non centrality parameter when the probability is 0 or 1, only possible answer is %1%", // LCOV_EXCL_LINE
-                  RealType(std::numeric_limits<RealType>::quiet_NaN()), Policy()); // LCOV_EXCL_LINE
+                  RealType(boost::math::numeric_limits<RealType>::quiet_NaN()), Policy()); // LCOV_EXCL_LINE
             }
             non_centrality_finder<RealType, Policy> f(v, x, p < q ? p : q, p < q ? false : true);
             tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
-            std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+            boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
             //
             // Pick an initial guess that we know will give us a probability
             // right around 0.5.
@@ -641,7 +646,7 @@ namespace boost
             RealType guess = x - v;
             if(guess < 1)
                guess = 1;
-            std::pair<RealType, RealType> ir = tools::bracket_and_solve_root(
+            boost::math::pair<RealType, RealType> ir = tools::bracket_and_solve_root(
                f, guess, RealType(2), false, tol, max_iter, pol);
             RealType result = ir.first + (ir.second - ir.first) / 2;
             if(max_iter >= policies::get_max_root_iterations<Policy>())
@@ -661,9 +666,9 @@ namespace boost
          typedef RealType value_type;
          typedef Policy policy_type;
 
-         non_central_chi_squared_distribution(RealType df_, RealType lambda) : df(df_), ncp(lambda)
+         BOOST_MATH_GPU_ENABLED non_central_chi_squared_distribution(RealType df_, RealType lambda) : df(df_), ncp(lambda)
          {
-            const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::non_central_chi_squared_distribution(%1%,%1%)";
+            constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::non_central_chi_squared_distribution(%1%,%1%)";
             RealType r;
             detail::check_df(
                function,
@@ -675,17 +680,17 @@ namespace boost
                Policy());
          } // non_central_chi_squared_distribution constructor.
 
-         RealType degrees_of_freedom() const
+         BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom() const
          { // Private data getter function.
             return df;
          }
-         RealType non_centrality() const
+         BOOST_MATH_GPU_ENABLED RealType non_centrality() const
          { // Private data getter function.
             return ncp;
          }
-         static RealType find_degrees_of_freedom(RealType lam, RealType x, RealType p)
+         BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(RealType lam, RealType x, RealType p)
          {
-            const char* function = "non_central_chi_squared<%1%>::find_degrees_of_freedom";
+            constexpr auto function = "non_central_chi_squared<%1%>::find_degrees_of_freedom";
             typedef typename policies::evaluation<RealType, Policy>::type eval_type;
             typedef typename policies::normalise<
                Policy,
@@ -704,9 +709,9 @@ namespace boost
                function);
          }
          template <class A, class B, class C>
-         static RealType find_degrees_of_freedom(const complemented3_type<A,B,C>& c)
+         BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(const complemented3_type<A,B,C>& c)
          {
-            const char* function = "non_central_chi_squared<%1%>::find_degrees_of_freedom";
+            constexpr auto function = "non_central_chi_squared<%1%>::find_degrees_of_freedom";
             typedef typename policies::evaluation<RealType, Policy>::type eval_type;
             typedef typename policies::normalise<
                Policy,
@@ -724,9 +729,9 @@ namespace boost
                result,
                function);
          }
-         static RealType find_non_centrality(RealType v, RealType x, RealType p)
+         BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(RealType v, RealType x, RealType p)
          {
-            const char* function = "non_central_chi_squared<%1%>::find_non_centrality";
+            constexpr auto function = "non_central_chi_squared<%1%>::find_non_centrality";
             typedef typename policies::evaluation<RealType, Policy>::type eval_type;
             typedef typename policies::normalise<
                Policy,
@@ -745,9 +750,9 @@ namespace boost
                function);
          }
          template <class A, class B, class C>
-         static RealType find_non_centrality(const complemented3_type<A,B,C>& c)
+         BOOST_MATH_GPU_ENABLED static RealType find_non_centrality(const complemented3_type<A,B,C>& c)
          {
-            const char* function = "non_central_chi_squared<%1%>::find_non_centrality";
+            constexpr auto function = "non_central_chi_squared<%1%>::find_non_centrality";
             typedef typename policies::evaluation<RealType, Policy>::type eval_type;
             typedef typename policies::normalise<
                Policy,
@@ -781,24 +786,24 @@ namespace boost
       // Non-member functions to give properties of the distribution.
 
       template <class RealType, class Policy>
-      inline const std::pair<RealType, RealType> range(const non_central_chi_squared_distribution<RealType, Policy>& /* dist */)
+      BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const non_central_chi_squared_distribution<RealType, Policy>& /* dist */)
       { // Range of permissible values for random variable k.
          using boost::math::tools::max_value;
-         return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // Max integer?
+         return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // Max integer?
       }
 
       template <class RealType, class Policy>
-      inline const std::pair<RealType, RealType> support(const non_central_chi_squared_distribution<RealType, Policy>& /* dist */)
+      BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const non_central_chi_squared_distribution<RealType, Policy>& /* dist */)
       { // Range of supported values for random variable k.
          // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
          using boost::math::tools::max_value;
-         return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
+         return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
       }
 
       template <class RealType, class Policy>
-      inline RealType mean(const non_central_chi_squared_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType mean(const non_central_chi_squared_distribution<RealType, Policy>& dist)
       { // Mean of poisson distribution = lambda.
-         const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::mean()";
+         constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::mean()";
          RealType k = dist.degrees_of_freedom();
          RealType l = dist.non_centrality();
          RealType r;
@@ -816,9 +821,9 @@ namespace boost
       } // mean
 
       template <class RealType, class Policy>
-      inline RealType mode(const non_central_chi_squared_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType mode(const non_central_chi_squared_distribution<RealType, Policy>& dist)
       { // mode.
-         static const char* function = "mode(non_central_chi_squared_distribution<%1%> const&)";
+         constexpr auto function = "mode(non_central_chi_squared_distribution<%1%> const&)";
 
          RealType k = dist.degrees_of_freedom();
          RealType l = dist.non_centrality();
@@ -839,9 +844,9 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType variance(const non_central_chi_squared_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType variance(const non_central_chi_squared_distribution<RealType, Policy>& dist)
       { // variance.
-         const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::variance()";
+         constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::variance()";
          RealType k = dist.degrees_of_freedom();
          RealType l = dist.non_centrality();
          RealType r;
@@ -862,9 +867,9 @@ namespace boost
       // standard_deviation provided by derived accessors.
 
       template <class RealType, class Policy>
-      inline RealType skewness(const non_central_chi_squared_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType skewness(const non_central_chi_squared_distribution<RealType, Policy>& dist)
       { // skewness = sqrt(l).
-         const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::skewness()";
+         constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::skewness()";
          RealType k = dist.degrees_of_freedom();
          RealType l = dist.non_centrality();
          RealType r;
@@ -883,9 +888,9 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType kurtosis_excess(const non_central_chi_squared_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const non_central_chi_squared_distribution<RealType, Policy>& dist)
       {
-         const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::kurtosis_excess()";
+         constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::kurtosis_excess()";
          RealType k = dist.degrees_of_freedom();
          RealType l = dist.non_centrality();
          RealType r;
@@ -903,21 +908,21 @@ namespace boost
       } // kurtosis_excess
 
       template <class RealType, class Policy>
-      inline RealType kurtosis(const non_central_chi_squared_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const non_central_chi_squared_distribution<RealType, Policy>& dist)
       {
          return kurtosis_excess(dist) + 3;
       }
 
       template <class RealType, class Policy>
-      inline RealType pdf(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
+      BOOST_MATH_GPU_ENABLED inline RealType pdf(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
       { // Probability Density/Mass Function.
          return detail::nccs_pdf(dist, x);
       } // pdf
 
       template <class RealType, class Policy>
-      RealType cdf(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
+      BOOST_MATH_GPU_ENABLED RealType cdf(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& x)
       {
-         const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)";
+         constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)";
          RealType k = dist.degrees_of_freedom();
          RealType l = dist.non_centrality();
          RealType r;
@@ -942,9 +947,9 @@ namespace boost
       } // cdf
 
       template <class RealType, class Policy>
-      RealType cdf(const complemented2_type<non_central_chi_squared_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type<non_central_chi_squared_distribution<RealType, Policy>, RealType>& c)
       { // Complemented Cumulative Distribution Function
-         const char* function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)";
+         constexpr auto function = "boost::math::non_central_chi_squared_distribution<%1%>::cdf(%1%)";
          non_central_chi_squared_distribution<RealType, Policy> const& dist = c.dist;
          RealType x = c.param;
          RealType k = dist.degrees_of_freedom();
@@ -971,13 +976,13 @@ namespace boost
       } // ccdf
 
       template <class RealType, class Policy>
-      inline RealType quantile(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
+      BOOST_MATH_GPU_ENABLED inline RealType quantile(const non_central_chi_squared_distribution<RealType, Policy>& dist, const RealType& p)
       { // Quantile (or Percent Point) function.
          return detail::nccs_quantile(dist, p, false);
       } // quantile
 
       template <class RealType, class Policy>
-      inline RealType quantile(const complemented2_type<non_central_chi_squared_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<non_central_chi_squared_distribution<RealType, Policy>, RealType>& c)
       { // Quantile (or Percent Point) function.
          return detail::nccs_quantile(c.dist, c.param, true);
       } // quantile complement.
diff --git a/include/boost/math/distributions/non_central_f.hpp b/include/boost/math/distributions/non_central_f.hpp
index e93d03e597..dedd437144 100644
--- a/include/boost/math/distributions/non_central_f.hpp
+++ b/include/boost/math/distributions/non_central_f.hpp
@@ -1,7 +1,7 @@
 // boost\math\distributions\non_central_f.hpp
 
 // Copyright John Maddock 2008.
-
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -10,9 +10,13 @@
 #ifndef BOOST_MATH_SPECIAL_NON_CENTRAL_F_HPP
 #define BOOST_MATH_SPECIAL_NON_CENTRAL_F_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/promotion.hpp>
 #include <boost/math/distributions/non_central_beta.hpp>
 #include <boost/math/distributions/detail/generic_mode.hpp>
 #include <boost/math/special_functions/pow.hpp>
+#include <boost/math/policies/policy.hpp>
 
 namespace boost
 {
@@ -25,9 +29,9 @@ namespace boost
          typedef RealType value_type;
          typedef Policy policy_type;
 
-         non_central_f_distribution(RealType v1_, RealType v2_, RealType lambda) : v1(v1_), v2(v2_), ncp(lambda)
+         BOOST_MATH_GPU_ENABLED non_central_f_distribution(RealType v1_, RealType v2_, RealType lambda) : v1(v1_), v2(v2_), ncp(lambda)
          {
-            const char* function = "boost::math::non_central_f_distribution<%1%>::non_central_f_distribution(%1%,%1%)";
+            constexpr auto function = "boost::math::non_central_f_distribution<%1%>::non_central_f_distribution(%1%,%1%)";
             RealType r;
             detail::check_df(
                function,
@@ -42,15 +46,15 @@ namespace boost
                Policy());
          } // non_central_f_distribution constructor.
 
-         RealType degrees_of_freedom1()const
+         BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom1()const
          {
             return v1;
          }
-         RealType degrees_of_freedom2()const
+         BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom2()const
          {
             return v2;
          }
-         RealType non_centrality() const
+         BOOST_MATH_GPU_ENABLED RealType non_centrality() const
          { // Private data getter function.
             return ncp;
          }
@@ -71,24 +75,24 @@ namespace boost
       // Non-member functions to give properties of the distribution.
 
       template <class RealType, class Policy>
-      inline const std::pair<RealType, RealType> range(const non_central_f_distribution<RealType, Policy>& /* dist */)
+      BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const non_central_f_distribution<RealType, Policy>& /* dist */)
       { // Range of permissible values for random variable k.
          using boost::math::tools::max_value;
-         return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
+         return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
       }
 
       template <class RealType, class Policy>
-      inline const std::pair<RealType, RealType> support(const non_central_f_distribution<RealType, Policy>& /* dist */)
+      BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const non_central_f_distribution<RealType, Policy>& /* dist */)
       { // Range of supported values for random variable k.
          // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
          using boost::math::tools::max_value;
-         return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
+         return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
       }
 
       template <class RealType, class Policy>
-      inline RealType mean(const non_central_f_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType mean(const non_central_f_distribution<RealType, Policy>& dist)
       {
-         const char* function = "mean(non_central_f_distribution<%1%> const&)";
+         constexpr auto function = "mean(non_central_f_distribution<%1%> const&)";
          RealType v1 = dist.degrees_of_freedom1();
          RealType v2 = dist.degrees_of_freedom2();
          RealType l = dist.non_centrality();
@@ -116,9 +120,9 @@ namespace boost
       } // mean
 
       template <class RealType, class Policy>
-      inline RealType mode(const non_central_f_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType mode(const non_central_f_distribution<RealType, Policy>& dist)
       { // mode.
-         static const char* function = "mode(non_central_chi_squared_distribution<%1%> const&)";
+         constexpr auto function = "mode(non_central_chi_squared_distribution<%1%> const&)";
 
          RealType n = dist.degrees_of_freedom1();
          RealType m = dist.degrees_of_freedom2();
@@ -146,9 +150,9 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType variance(const non_central_f_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType variance(const non_central_f_distribution<RealType, Policy>& dist)
       { // variance.
-         const char* function = "variance(non_central_f_distribution<%1%> const&)";
+         constexpr auto function = "variance(non_central_f_distribution<%1%> const&)";
          RealType n = dist.degrees_of_freedom1();
          RealType m = dist.degrees_of_freedom2();
          RealType l = dist.non_centrality();
@@ -182,9 +186,9 @@ namespace boost
       // standard_deviation provided by derived accessors.
 
       template <class RealType, class Policy>
-      inline RealType skewness(const non_central_f_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType skewness(const non_central_f_distribution<RealType, Policy>& dist)
       { // skewness = sqrt(l).
-         const char* function = "skewness(non_central_f_distribution<%1%> const&)";
+         constexpr auto function = "skewness(non_central_f_distribution<%1%> const&)";
          BOOST_MATH_STD_USING
          RealType n = dist.degrees_of_freedom1();
          RealType m = dist.degrees_of_freedom2();
@@ -219,9 +223,9 @@ namespace boost
       }
 
       template <class RealType, class Policy>
-      inline RealType kurtosis_excess(const non_central_f_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const non_central_f_distribution<RealType, Policy>& dist)
       {
-         const char* function = "kurtosis_excess(non_central_f_distribution<%1%> const&)";
+         constexpr auto function = "kurtosis_excess(non_central_f_distribution<%1%> const&)";
          BOOST_MATH_STD_USING
          RealType n = dist.degrees_of_freedom1();
          RealType m = dist.degrees_of_freedom2();
@@ -266,13 +270,13 @@ namespace boost
       } // kurtosis_excess
 
       template <class RealType, class Policy>
-      inline RealType kurtosis(const non_central_f_distribution<RealType, Policy>& dist)
+      BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const non_central_f_distribution<RealType, Policy>& dist)
       {
          return kurtosis_excess(dist) + 3;
       }
 
       template <class RealType, class Policy>
-      inline RealType pdf(const non_central_f_distribution<RealType, Policy>& dist, const RealType& x)
+      BOOST_MATH_GPU_ENABLED inline RealType pdf(const non_central_f_distribution<RealType, Policy>& dist, const RealType& x)
       { // Probability Density/Mass Function.
          typedef typename policies::evaluation<RealType, Policy>::type value_type;
          typedef typename policies::normalise<
@@ -292,9 +296,9 @@ namespace boost
       } // pdf
 
       template <class RealType, class Policy>
-      RealType cdf(const non_central_f_distribution<RealType, Policy>& dist, const RealType& x)
+      BOOST_MATH_GPU_ENABLED RealType cdf(const non_central_f_distribution<RealType, Policy>& dist, const RealType& x)
       {
-         const char* function = "cdf(const non_central_f_distribution<%1%>&, %1%)";
+         constexpr auto function = "cdf(const non_central_f_distribution<%1%>&, %1%)";
          RealType r;
          if(!detail::check_df(
             function,
@@ -333,9 +337,9 @@ namespace boost
       } // cdf
 
       template <class RealType, class Policy>
-      RealType cdf(const complemented2_type<non_central_f_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type<non_central_f_distribution<RealType, Policy>, RealType>& c)
       { // Complemented Cumulative Distribution Function
-         const char* function = "cdf(complement(const non_central_f_distribution<%1%>&, %1%))";
+         constexpr auto function = "cdf(complement(const non_central_f_distribution<%1%>&, %1%))";
          RealType r;
          if(!detail::check_df(
             function,
@@ -374,7 +378,7 @@ namespace boost
       } // ccdf
 
       template <class RealType, class Policy>
-      inline RealType quantile(const non_central_f_distribution<RealType, Policy>& dist, const RealType& p)
+      BOOST_MATH_GPU_ENABLED inline RealType quantile(const non_central_f_distribution<RealType, Policy>& dist, const RealType& p)
       { // Quantile (or Percent Point) function.
          RealType alpha = dist.degrees_of_freedom1() / 2;
          RealType beta = dist.degrees_of_freedom2() / 2;
@@ -388,7 +392,7 @@ namespace boost
       } // quantile
 
       template <class RealType, class Policy>
-      inline RealType quantile(const complemented2_type<non_central_f_distribution<RealType, Policy>, RealType>& c)
+      BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<non_central_f_distribution<RealType, Policy>, RealType>& c)
       { // Quantile (or Percent Point) function.
          RealType alpha = c.dist.degrees_of_freedom1() / 2;
          RealType beta = c.dist.degrees_of_freedom2() / 2;
diff --git a/include/boost/math/distributions/normal.hpp b/include/boost/math/distributions/normal.hpp
index 70259e62b1..9d973fb539 100644
--- a/include/boost/math/distributions/normal.hpp
+++ b/include/boost/math/distributions/normal.hpp
@@ -1,6 +1,6 @@
 //  Copyright John Maddock 2006, 2007.
 //  Copyright Paul A. Bristow 2006, 2007.
-
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -15,13 +15,15 @@
 // From MathWorld--A Wolfram Web Resource.
 // http://mathworld.wolfram.com/NormalDistribution.html
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/erf.hpp> // for erf/erfc.
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
-
-#include <utility>
-#include <type_traits>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/policies/policy.hpp>
 
 namespace boost{ namespace math{
 
@@ -32,32 +34,32 @@ class normal_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit normal_distribution(RealType l_mean = 0, RealType sd = 1)
+   BOOST_MATH_GPU_ENABLED explicit normal_distribution(RealType l_mean = 0, RealType sd = 1)
       : m_mean(l_mean), m_sd(sd)
    { // Default is a 'standard' normal distribution N01.
-     static const char* function = "boost::math::normal_distribution<%1%>::normal_distribution";
+     constexpr auto function = "boost::math::normal_distribution<%1%>::normal_distribution";
 
      RealType result;
      detail::check_scale(function, sd, &result, Policy());
      detail::check_location(function, l_mean, &result, Policy());
    }
 
-   RealType mean()const
+   BOOST_MATH_GPU_ENABLED RealType mean()const
    { // alias for location.
       return m_mean;
    }
 
-   RealType standard_deviation()const
+   BOOST_MATH_GPU_ENABLED RealType standard_deviation()const
    { // alias for scale.
       return m_sd;
    }
 
    // Synonyms, provided to allow generic use of find_location and find_scale.
-   RealType location()const
+   BOOST_MATH_GPU_ENABLED RealType location()const
    { // location.
       return m_mean;
    }
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    { // scale.
       return m_sd;
    }
@@ -92,30 +94,30 @@ normal_distribution(RealType)->normal_distribution<typename boost::math::tools::
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const normal_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const normal_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   { 
-     return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
+     return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
   { // Can only use max_value.
     using boost::math::tools::max_value;
-    return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max value.
+    return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max value.
   }
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const normal_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const normal_distribution<RealType, Policy>& /*dist*/)
 { // This is range values for random variable x where cdf rises from 0 to 1, and outside it, the pdf is zero.
-  if (std::numeric_limits<RealType>::has_infinity)
+  BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
   { 
-     return std::pair<RealType, RealType>(-std::numeric_limits<RealType>::infinity(), std::numeric_limits<RealType>::infinity()); // - to + infinity.
+     return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
   }
   else
   { // Can only use max_value.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(-max_value<RealType>(),  max_value<RealType>()); // - to + max value.
+   return boost::math::pair<RealType, RealType>(-max_value<RealType>(),  max_value<RealType>()); // - to + max value.
   }
 }
 
@@ -124,14 +126,14 @@ inline std::pair<RealType, RealType> support(const normal_distribution<RealType,
 #endif
 
 template <class RealType, class Policy>
-inline RealType pdf(const normal_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const normal_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType sd = dist.standard_deviation();
    RealType mean = dist.mean();
 
-   static const char* function = "boost::math::pdf(const normal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const normal_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(false == detail::check_scale(function, sd, &result, Policy()))
@@ -162,16 +164,16 @@ inline RealType pdf(const normal_distribution<RealType, Policy>& dist, const Rea
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const normal_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const normal_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    const RealType sd = dist.standard_deviation();
    const RealType mean = dist.mean();
 
-   static const char* function = "boost::math::logpdf(const normal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logpdf(const normal_distribution<%1%>&, %1%)";
 
-   RealType result = -std::numeric_limits<RealType>::infinity();
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
    if(false == detail::check_scale(function, sd, &result, Policy()))
    {
       return result;
@@ -198,13 +200,13 @@ inline RealType logpdf(const normal_distribution<RealType, Policy>& dist, const
 }
 
 template <class RealType, class Policy>
-inline RealType cdf(const normal_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const normal_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType sd = dist.standard_deviation();
    RealType mean = dist.mean();
-   static const char* function = "boost::math::cdf(const normal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const normal_distribution<%1%>&, %1%)";
    RealType result = 0;
    if(false == detail::check_scale(function, sd, &result, Policy()))
    {
@@ -229,13 +231,13 @@ inline RealType cdf(const normal_distribution<RealType, Policy>& dist, const Rea
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const normal_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const normal_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType sd = dist.standard_deviation();
    RealType mean = dist.mean();
-   static const char* function = "boost::math::quantile(const normal_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const normal_distribution<%1%>&, %1%)";
 
    RealType result = 0;
    if(false == detail::check_scale(function, sd, &result, Policy()))
@@ -253,14 +255,14 @@ inline RealType quantile(const normal_distribution<RealType, Policy>& dist, cons
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<normal_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<normal_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType sd = c.dist.standard_deviation();
    RealType mean = c.dist.mean();
    RealType x = c.param;
-   static const char* function = "boost::math::cdf(const complement(normal_distribution<%1%>&), %1%)";
+   constexpr auto function = "boost::math::cdf(const complement(normal_distribution<%1%>&), %1%)";
 
    RealType result = 0;
    if(false == detail::check_scale(function, sd, &result, Policy()))
@@ -281,13 +283,13 @@ inline RealType cdf(const complemented2_type<normal_distribution<RealType, Polic
 } // cdf complement
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<normal_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<normal_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
    RealType sd = c.dist.standard_deviation();
    RealType mean = c.dist.mean();
-   static const char* function = "boost::math::quantile(const complement(normal_distribution<%1%>&), %1%)";
+   constexpr auto function = "boost::math::quantile(const complement(normal_distribution<%1%>&), %1%)";
    RealType result = 0;
    if(false == detail::check_scale(function, sd, &result, Policy()))
       return result;
@@ -303,51 +305,51 @@ inline RealType quantile(const complemented2_type<normal_distribution<RealType,
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType mean(const normal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const normal_distribution<RealType, Policy>& dist)
 {
    return dist.mean();
 }
 
 template <class RealType, class Policy>
-inline RealType standard_deviation(const normal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType standard_deviation(const normal_distribution<RealType, Policy>& dist)
 {
    return dist.standard_deviation();
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const normal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const normal_distribution<RealType, Policy>& dist)
 {
    return dist.mean();
 }
 
 template <class RealType, class Policy>
-inline RealType median(const normal_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const normal_distribution<RealType, Policy>& dist)
 {
    return dist.mean();
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const normal_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const normal_distribution<RealType, Policy>& /*dist*/)
 {
    return 0;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const normal_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const normal_distribution<RealType, Policy>& /*dist*/)
 {
    return 3;
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const normal_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const normal_distribution<RealType, Policy>& /*dist*/)
 {
    return 0;
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const normal_distribution<RealType, Policy> & dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const normal_distribution<RealType, Policy> & dist)
 {
-   using std::log;
+   BOOST_MATH_STD_USING
    RealType arg = constants::two_pi<RealType>()*constants::e<RealType>()*dist.standard_deviation()*dist.standard_deviation();
    return log(arg)/2;
 }
diff --git a/include/boost/math/distributions/pareto.hpp b/include/boost/math/distributions/pareto.hpp
index 778310d10e..97cf8024a0 100644
--- a/include/boost/math/distributions/pareto.hpp
+++ b/include/boost/math/distributions/pareto.hpp
@@ -17,16 +17,15 @@
 // Handbook of Statistical Distributions with Applications, K Krishnamoorthy, ISBN 1-58488-635-8, Chapter 23, pp 257 - 267.
 // Caution KK's a and b are the reverse of Mathworld!
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/special_functions/powm1.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 
-#include <utility> // for BOOST_CURRENT_VALUE?
-#include <limits>
-#include <cmath>
-
 namespace boost
 {
   namespace math
@@ -34,7 +33,7 @@ namespace boost
     namespace detail
     { // Parameter checking.
       template <class RealType, class Policy>
-      inline bool check_pareto_scale(
+      BOOST_MATH_GPU_ENABLED inline bool check_pareto_scale(
         const char* function,
         RealType scale,
         RealType* result, const Policy& pol)
@@ -63,7 +62,7 @@ namespace boost
       } // bool check_pareto_scale
 
       template <class RealType, class Policy>
-      inline bool check_pareto_shape(
+      BOOST_MATH_GPU_ENABLED inline bool check_pareto_shape(
         const char* function,
         RealType shape,
         RealType* result, const Policy& pol)
@@ -92,7 +91,7 @@ namespace boost
       } // bool check_pareto_shape(
 
       template <class RealType, class Policy>
-      inline bool check_pareto_x(
+      BOOST_MATH_GPU_ENABLED inline bool check_pareto_x(
         const char* function,
         RealType const& x,
         RealType* result, const Policy& pol)
@@ -121,7 +120,7 @@ namespace boost
       } // bool check_pareto_x
 
       template <class RealType, class Policy>
-      inline bool check_pareto( // distribution parameters.
+      BOOST_MATH_GPU_ENABLED inline bool check_pareto( // distribution parameters.
         const char* function,
         RealType scale,
         RealType shape,
@@ -140,19 +139,19 @@ namespace boost
       typedef RealType value_type;
       typedef Policy policy_type;
 
-      pareto_distribution(RealType l_scale = 1, RealType l_shape = 1)
+      BOOST_MATH_GPU_ENABLED pareto_distribution(RealType l_scale = 1, RealType l_shape = 1)
         : m_scale(l_scale), m_shape(l_shape)
       { // Constructor.
         RealType result = 0;
         detail::check_pareto("boost::math::pareto_distribution<%1%>::pareto_distribution", l_scale, l_shape, &result, Policy());
       }
 
-      RealType scale()const
+      BOOST_MATH_GPU_ENABLED RealType scale()const
       { // AKA Xm and Wolfram b and beta
         return m_scale;
       }
 
-      RealType shape()const
+      BOOST_MATH_GPU_ENABLED RealType shape()const
       { // AKA k and Wolfram a and alpha
         return m_shape;
       }
@@ -173,25 +172,25 @@ namespace boost
 
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> range(const pareto_distribution<RealType, Policy>& /*dist*/)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const pareto_distribution<RealType, Policy>& /*dist*/)
     { // Range of permissible values for random variable x.
       using boost::math::tools::max_value;
-      return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // scale zero to + infinity.
+      return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // scale zero to + infinity.
     } // range
 
     template <class RealType, class Policy>
-    inline const std::pair<RealType, RealType> support(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const pareto_distribution<RealType, Policy>& dist)
     { // Range of supported values for random variable x.
       // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
       using boost::math::tools::max_value;
-      return std::pair<RealType, RealType>(dist.scale(), max_value<RealType>() ); // scale to + infinity.
+      return boost::math::pair<RealType, RealType>(dist.scale(), max_value<RealType>() ); // scale to + infinity.
     } // support
 
     template <class RealType, class Policy>
-    inline RealType pdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType pdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
     {
       BOOST_MATH_STD_USING  // for ADL of std function pow.
-      static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
       RealType scale = dist.scale();
       RealType shape = dist.shape();
       RealType result = 0;
@@ -207,10 +206,10 @@ namespace boost
     } // pdf
 
     template <class RealType, class Policy>
-    inline RealType cdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
     {
       BOOST_MATH_STD_USING  // for ADL of std function pow.
-      static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
       RealType scale = dist.scale();
       RealType shape = dist.shape();
       RealType result = 0;
@@ -230,10 +229,10 @@ namespace boost
     } // cdf
 
     template <class RealType, class Policy>
-    inline RealType logcdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
+    BOOST_MATH_GPU_ENABLED inline RealType logcdf(const pareto_distribution<RealType, Policy>& dist, const RealType& x)
     {
       BOOST_MATH_STD_USING  // for ADL of std function pow.
-      static const char* function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)";
       RealType scale = dist.scale();
       RealType shape = dist.shape();
       RealType result = 0;
@@ -244,7 +243,7 @@ namespace boost
 
       if (x <= scale)
       { // regardless of shape, cdf is zero.
-        return -std::numeric_limits<RealType>::infinity();
+        return -boost::math::numeric_limits<RealType>::infinity();
       }
 
       result = log1p(-pow(scale/x, shape), Policy());
@@ -252,10 +251,10 @@ namespace boost
     } // logcdf
 
     template <class RealType, class Policy>
-    inline RealType quantile(const pareto_distribution<RealType, Policy>& dist, const RealType& p)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const pareto_distribution<RealType, Policy>& dist, const RealType& p)
     {
       BOOST_MATH_STD_USING  // for ADL of std function pow.
-      static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
       RealType result = 0;
       RealType scale = dist.scale();
       RealType shape = dist.shape();
@@ -279,10 +278,10 @@ namespace boost
     } // quantile
 
     template <class RealType, class Policy>
-    inline RealType cdf(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
     {
        BOOST_MATH_STD_USING  // for ADL of std function pow.
-       static const char* function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::cdf(const pareto_distribution<%1%>&, %1%)";
        RealType result = 0;
        RealType x = c.param;
        RealType scale = c.dist.scale();
@@ -301,10 +300,10 @@ namespace boost
     } // cdf complement
 
     template <class RealType, class Policy>
-    inline RealType logcdf(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
     {
        BOOST_MATH_STD_USING  // for ADL of std function pow.
-       static const char* function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)";
+       constexpr auto function = "boost::math::logcdf(const pareto_distribution<%1%>&, %1%)";
        RealType result = 0;
        RealType x = c.param;
        RealType scale = c.dist.scale();
@@ -323,10 +322,10 @@ namespace boost
     } // logcdf complement
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<pareto_distribution<RealType, Policy>, RealType>& c)
     {
       BOOST_MATH_STD_USING  // for ADL of std function pow.
-      static const char* function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::quantile(const pareto_distribution<%1%>&, %1%)";
       RealType result = 0;
       RealType q = c.param;
       RealType scale = c.dist.scale();
@@ -350,10 +349,10 @@ namespace boost
     } // quantile complement
 
     template <class RealType, class Policy>
-    inline RealType mean(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const pareto_distribution<RealType, Policy>& dist)
     {
       RealType result = 0;
-      static const char* function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::mean(const pareto_distribution<%1%>&, %1%)";
       if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy()))
       {
         return result;
@@ -370,16 +369,16 @@ namespace boost
     } // mean
 
     template <class RealType, class Policy>
-    inline RealType mode(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const pareto_distribution<RealType, Policy>& dist)
     {
       return dist.scale();
     } // mode
 
     template <class RealType, class Policy>
-    inline RealType median(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType median(const pareto_distribution<RealType, Policy>& dist)
     {
       RealType result = 0;
-      static const char* function = "boost::math::median(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::median(const pareto_distribution<%1%>&, %1%)";
       if(false == detail::check_pareto(function, dist.scale(), dist.shape(), &result, Policy()))
       {
         return result;
@@ -389,12 +388,12 @@ namespace boost
     } // median
 
     template <class RealType, class Policy>
-    inline RealType variance(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const pareto_distribution<RealType, Policy>& dist)
     {
       RealType result = 0;
       RealType scale = dist.scale();
       RealType shape = dist.shape();
-      static const char* function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::variance(const pareto_distribution<%1%>&, %1%)";
       if(false == detail::check_pareto(function, scale, shape, &result, Policy()))
       {
         return result;
@@ -414,12 +413,12 @@ namespace boost
     } // variance
 
     template <class RealType, class Policy>
-    inline RealType skewness(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const pareto_distribution<RealType, Policy>& dist)
     {
       BOOST_MATH_STD_USING
       RealType result = 0;
       RealType shape = dist.shape();
-      static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
       if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy()))
       {
         return result;
@@ -440,11 +439,11 @@ namespace boost
     } // skewness
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const pareto_distribution<RealType, Policy>& dist)
     {
       RealType result = 0;
       RealType shape = dist.shape();
-      static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
       if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy()))
       {
         return result;
@@ -464,11 +463,11 @@ namespace boost
     } // kurtosis
 
     template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const pareto_distribution<RealType, Policy>& dist)
     {
       RealType result = 0;
       RealType shape = dist.shape();
-      static const char* function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::pdf(const pareto_distribution<%1%>&, %1%)";
       if(false == detail::check_pareto(function, dist.scale(), shape, &result, Policy()))
       {
         return result;
@@ -488,9 +487,9 @@ namespace boost
     } // kurtosis_excess
 
     template <class RealType, class Policy>
-    inline RealType entropy(const pareto_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType entropy(const pareto_distribution<RealType, Policy>& dist)
     {
-      using std::log;
+      BOOST_MATH_STD_USING
       RealType xm = dist.scale();
       RealType alpha = dist.shape();
       return log(xm/alpha) + 1 + 1/alpha;
diff --git a/include/boost/math/distributions/poisson.hpp b/include/boost/math/distributions/poisson.hpp
index 570a590259..c2fad66be0 100644
--- a/include/boost/math/distributions/poisson.hpp
+++ b/include/boost/math/distributions/poisson.hpp
@@ -2,6 +2,7 @@
 
 // Copyright John Maddock 2006.
 // Copyright Paul A. Bristow 2007.
+// Copyright Matt Borland 2024.
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -36,6 +37,10 @@
 #ifndef BOOST_MATH_SPECIAL_POISSON_HPP
 #define BOOST_MATH_SPECIAL_POISSON_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp> // for incomplete gamma. gamma_q
 #include <boost/math/special_functions/trunc.hpp> // for incomplete gamma. gamma_q
@@ -46,9 +51,6 @@
 #include <boost/math/tools/roots.hpp> // for root finding.
 #include <boost/math/distributions/detail/inv_discrete_quantile.hpp>
 
-#include <utility>
-#include <limits>
-
 namespace boost
 {
   namespace math
@@ -60,7 +62,7 @@ namespace boost
       // checks are always performed, even if exceptions are not enabled.
 
       template <class RealType, class Policy>
-      inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_mean(const char* function, const RealType& mean, RealType* result, const Policy& pol)
       {
         if(!(boost::math::isfinite)(mean) || (mean < 0))
         {
@@ -73,7 +75,7 @@ namespace boost
       } // bool check_mean
 
       template <class RealType, class Policy>
-      inline bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_mean_NZ(const char* function, const RealType& mean, RealType* result, const Policy& pol)
       { // mean == 0 is considered an error.
         if( !(boost::math::isfinite)(mean) || (mean <= 0))
         {
@@ -86,13 +88,13 @@ namespace boost
       } // bool check_mean_NZ
 
       template <class RealType, class Policy>
-      inline bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist(const char* function, const RealType& mean, RealType* result, const Policy& pol)
       { // Only one check, so this is redundant really but should be optimized away.
         return check_mean_NZ(function, mean, result, pol);
       } // bool check_dist
 
       template <class RealType, class Policy>
-      inline bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_k(const char* function, const RealType& k, RealType* result, const Policy& pol)
       {
         if((k < 0) || !(boost::math::isfinite)(k))
         {
@@ -105,7 +107,7 @@ namespace boost
       } // bool check_k
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_k(const char* function, RealType mean, RealType k, RealType* result, const Policy& pol)
       {
         if((check_dist(function, mean, result, pol) == false) ||
           (check_k(function, k, result, pol) == false))
@@ -116,7 +118,7 @@ namespace boost
       } // bool check_dist_and_k
 
       template <class RealType, class Policy>
-      inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_prob(const char* function, const RealType& p, RealType* result, const Policy& pol)
       { // Check 0 <= p <= 1
         if(!(boost::math::isfinite)(p) || (p < 0) || (p > 1))
         {
@@ -129,7 +131,7 @@ namespace boost
       } // bool check_prob
 
       template <class RealType, class Policy>
-      inline bool check_dist_and_prob(const char* function, RealType mean,  RealType p, RealType* result, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED inline bool check_dist_and_prob(const char* function, RealType mean,  RealType p, RealType* result, const Policy& pol)
       {
         if((check_dist(function, mean, result, pol) == false) ||
           (check_prob(function, p, result, pol) == false))
@@ -148,7 +150,7 @@ namespace boost
       using value_type = RealType;
       using policy_type = Policy;
 
-      explicit poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda).
+      BOOST_MATH_GPU_ENABLED explicit poisson_distribution(RealType l_mean = 1) : m_l(l_mean) // mean (lambda).
       { // Expected mean number of events that occur during the given interval.
         RealType r;
         poisson_detail::check_dist(
@@ -157,7 +159,7 @@ namespace boost
           &r, Policy());
       } // poisson_distribution constructor.
 
-      RealType mean() const
+      BOOST_MATH_GPU_ENABLED RealType mean() const
       { // Private data getter function.
         return m_l;
       }
@@ -176,28 +178,28 @@ namespace boost
     // Non-member functions to give properties of the distribution.
 
     template <class RealType, class Policy>
-    inline std::pair<RealType, RealType> range(const poisson_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const poisson_distribution<RealType, Policy>& /* dist */)
     { // Range of permissible values for random variable k.
        using boost::math::tools::max_value;
-       return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // Max integer?
+       return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>()); // Max integer?
     }
 
     template <class RealType, class Policy>
-    inline std::pair<RealType, RealType> support(const poisson_distribution<RealType, Policy>& /* dist */)
+    BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const poisson_distribution<RealType, Policy>& /* dist */)
     { // Range of supported values for random variable k.
        // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
        using boost::math::tools::max_value;
-       return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
+       return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
     }
 
     template <class RealType, class Policy>
-    inline RealType mean(const poisson_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mean(const poisson_distribution<RealType, Policy>& dist)
     { // Mean of poisson distribution = lambda.
       return dist.mean();
     } // mean
 
     template <class RealType, class Policy>
-    inline RealType mode(const poisson_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType mode(const poisson_distribution<RealType, Policy>& dist)
     { // mode.
       BOOST_MATH_STD_USING // ADL of std functions.
       return floor(dist.mean());
@@ -206,7 +208,7 @@ namespace boost
     // Median now implemented via quantile(half) in derived accessors.
 
     template <class RealType, class Policy>
-    inline RealType variance(const poisson_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType variance(const poisson_distribution<RealType, Policy>& dist)
     { // variance.
       return dist.mean();
     }
@@ -214,14 +216,14 @@ namespace boost
     // standard_deviation provided by derived accessors.
 
     template <class RealType, class Policy>
-    inline RealType skewness(const poisson_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType skewness(const poisson_distribution<RealType, Policy>& dist)
     { // skewness = sqrt(l).
       BOOST_MATH_STD_USING // ADL of std functions.
       return 1 / sqrt(dist.mean());
     }
 
     template <class RealType, class Policy>
-    inline RealType kurtosis_excess(const poisson_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const poisson_distribution<RealType, Policy>& dist)
     { // skewness = sqrt(l).
       return 1 / dist.mean(); // kurtosis_excess 1/mean from Wiki & MathWorld eq 31.
       // http://mathworld.wolfram.com/Kurtosis.html explains that the kurtosis excess
@@ -230,7 +232,7 @@ namespace boost
     } // RealType kurtosis_excess
 
     template <class RealType, class Policy>
-    inline RealType kurtosis(const poisson_distribution<RealType, Policy>& dist)
+    BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const poisson_distribution<RealType, Policy>& dist)
     { // kurtosis is 4th moment about the mean = u4 / sd ^ 4
       // http://en.wikipedia.org/wiki/Kurtosis
       // kurtosis can range from -2 (flat top) to +infinity (sharp peak & heavy tails).
@@ -242,7 +244,7 @@ namespace boost
     } // RealType kurtosis
 
     template <class RealType, class Policy>
-    RealType pdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED RealType pdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
     { // Probability Density/Mass Function.
       // Probability that there are EXACTLY k occurrences (or arrivals).
       BOOST_FPU_EXCEPTION_GUARD
@@ -274,7 +276,7 @@ namespace boost
     } // pdf
 
     template <class RealType, class Policy>
-    RealType logpdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED RealType logpdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
     {
       BOOST_FPU_EXCEPTION_GUARD
 
@@ -283,7 +285,7 @@ namespace boost
 
       RealType mean = dist.mean();
       // Error check:
-      RealType result = -std::numeric_limits<RealType>::infinity();
+      RealType result = -boost::math::numeric_limits<RealType>::infinity();
       if(false == poisson_detail::check_dist_and_k(
         "boost::math::pdf(const poisson_distribution<%1%>&, %1%)",
         mean,
@@ -296,7 +298,7 @@ namespace boost
       // Special case of mean zero, regardless of the number of events k.
       if (mean == 0)
       { // Probability for any k is zero.
-        return std::numeric_limits<RealType>::quiet_NaN();
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
       }
       
       // Special case where k and lambda are both positive
@@ -310,7 +312,7 @@ namespace boost
     }
 
     template <class RealType, class Policy>
-    RealType cdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
+    BOOST_MATH_GPU_ENABLED RealType cdf(const poisson_distribution<RealType, Policy>& dist, const RealType& k)
     { // Cumulative Distribution Function Poisson.
       // The random variate k is the number of occurrences(or arrivals)
       // k argument may be integral, signed, or unsigned, or floating point.
@@ -361,7 +363,7 @@ namespace boost
     } // binomial cdf
 
     template <class RealType, class Policy>
-    RealType cdf(const complemented2_type<poisson_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type<poisson_distribution<RealType, Policy>, RealType>& c)
     { // Complemented Cumulative Distribution Function Poisson
       // The random variate k is the number of events, occurrences or arrivals.
       // k argument may be integral, signed, or unsigned, or floating point.
@@ -411,10 +413,10 @@ namespace boost
     } // poisson ccdf
 
     template <class RealType, class Policy>
-    inline RealType quantile(const poisson_distribution<RealType, Policy>& dist, const RealType& p)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const poisson_distribution<RealType, Policy>& dist, const RealType& p)
     { // Quantile (or Percent Point) Poisson function.
       // Return the number of expected events k for a given probability p.
-      static const char* function = "boost::math::quantile(const poisson_distribution<%1%>&, %1%)";
+      constexpr auto function = "boost::math::quantile(const poisson_distribution<%1%>&, %1%)";
       RealType result = 0; // of Argument checks:
       if(false == poisson_detail::check_prob(
         function,
@@ -443,7 +445,7 @@ namespace boost
          return policies::raise_overflow_error<RealType>(function, 0, Policy());
       }
       using discrete_type = typename Policy::discrete_quantile_type;
-      std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+      boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
       RealType guess;
       RealType factor = 8;
       RealType z = dist.mean();
@@ -477,13 +479,13 @@ namespace boost
    } // quantile
 
     template <class RealType, class Policy>
-    inline RealType quantile(const complemented2_type<poisson_distribution<RealType, Policy>, RealType>& c)
+    BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<poisson_distribution<RealType, Policy>, RealType>& c)
     { // Quantile (or Percent Point) of Poisson function.
       // Return the number of expected events k for a given
       // complement of the probability q.
       //
       // Error checks:
-      static const char* function = "boost::math::quantile(complement(const poisson_distribution<%1%>&, %1%))";
+      constexpr auto function = "boost::math::quantile(complement(const poisson_distribution<%1%>&, %1%))";
       RealType q = c.param;
       const poisson_distribution<RealType, Policy>& dist = c.dist;
       RealType result = 0;  // of argument checks.
@@ -514,7 +516,7 @@ namespace boost
          return 0;  // Exact result regardless of discrete-quantile Policy
       }
       using discrete_type = typename Policy::discrete_quantile_type;
-      std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+      boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
       RealType guess;
       RealType factor = 8;
       RealType z = dist.mean();
diff --git a/include/boost/math/distributions/rayleigh.hpp b/include/boost/math/distributions/rayleigh.hpp
index 4e741313c8..155525b539 100644
--- a/include/boost/math/distributions/rayleigh.hpp
+++ b/include/boost/math/distributions/rayleigh.hpp
@@ -7,6 +7,10 @@
 #ifndef BOOST_STATS_rayleigh_HPP
 #define BOOST_STATS_rayleigh_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/special_functions/log1p.hpp>
@@ -19,16 +23,12 @@
 # pragma warning(disable: 4702) // unreachable code (return after domain_error throw).
 #endif
 
-#include <utility>
-#include <limits>
-#include <cmath>
-
 namespace boost{ namespace math{
 
 namespace detail
 { // Error checks:
   template <class RealType, class Policy>
-  inline bool verify_sigma(const char* function, RealType sigma, RealType* presult, const Policy& pol)
+  BOOST_MATH_GPU_ENABLED inline bool verify_sigma(const char* function, RealType sigma, RealType* presult, const Policy& pol)
   {
      if((sigma <= 0) || (!(boost::math::isfinite)(sigma)))
      {
@@ -41,7 +41,7 @@ namespace detail
   } // bool verify_sigma
 
   template <class RealType, class Policy>
-  inline bool verify_rayleigh_x(const char* function, RealType x, RealType* presult, const Policy& pol)
+  BOOST_MATH_GPU_ENABLED inline bool verify_rayleigh_x(const char* function, RealType x, RealType* presult, const Policy& pol)
   {
      if((x < 0) || (boost::math::isnan)(x))
      {
@@ -61,14 +61,14 @@ class rayleigh_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit rayleigh_distribution(RealType l_sigma = 1)
+   BOOST_MATH_GPU_ENABLED explicit rayleigh_distribution(RealType l_sigma = 1)
       : m_sigma(l_sigma)
    {
       RealType err;
       detail::verify_sigma("boost::math::rayleigh_distribution<%1%>::rayleigh_distribution", l_sigma, &err, Policy());
    } // rayleigh_distribution
 
-   RealType sigma()const
+   BOOST_MATH_GPU_ENABLED RealType sigma()const
    { // Accessor.
      return m_sigma;
    }
@@ -85,28 +85,28 @@ rayleigh_distribution(RealType)->rayleigh_distribution<typename boost::math::too
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const rayleigh_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const rayleigh_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), std::numeric_limits<RealType>::has_infinity ? std::numeric_limits<RealType>::infinity() : max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), boost::math::numeric_limits<RealType>::has_infinity ? boost::math::numeric_limits<RealType>::infinity() : max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const rayleigh_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const rayleigh_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0),  max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std function exp.
 
    RealType sigma = dist.sigma();
    RealType result = 0;
-   static const char* function = "boost::math::pdf(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::pdf(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
       return result;
@@ -125,13 +125,13 @@ inline RealType pdf(const rayleigh_distribution<RealType, Policy>& dist, const R
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType logpdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std function exp.
 
    const RealType sigma = dist.sigma();
-   RealType result = -std::numeric_limits<RealType>::infinity();
-   static const char* function = "boost::math::logpdf(const rayleigh_distribution<%1%>&, %1%)";
+   RealType result = -boost::math::numeric_limits<RealType>::infinity();
+   constexpr auto function = "boost::math::logpdf(const rayleigh_distribution<%1%>&, %1%)";
 
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
@@ -151,13 +151,13 @@ inline RealType logpdf(const rayleigh_distribution<RealType, Policy>& dist, cons
 } // logpdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
    RealType result = 0;
    RealType sigma = dist.sigma();
-   static const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
       return result;
@@ -171,33 +171,33 @@ inline RealType cdf(const rayleigh_distribution<RealType, Policy>& dist, const R
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType logcdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const rayleigh_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
    RealType result = 0;
    RealType sigma = dist.sigma();
-   static const char* function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
-      return -std::numeric_limits<RealType>::infinity();
+      return -boost::math::numeric_limits<RealType>::infinity();
    }
    if(false == detail::verify_rayleigh_x(function, x, &result, Policy()))
    {
-      return -std::numeric_limits<RealType>::infinity();
+      return -boost::math::numeric_limits<RealType>::infinity();
    }
    result = log1p(-exp(-x * x / ( 2 * sigma * sigma)), Policy());   
    return result;
 } // logcdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const rayleigh_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const rayleigh_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
    RealType result = 0;
    RealType sigma = dist.sigma();
-   static const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
       return result;
    if(false == detail::check_probability(function, p, &result, Policy()))
@@ -216,13 +216,13 @@ inline RealType quantile(const rayleigh_distribution<RealType, Policy>& dist, co
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<rayleigh_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<rayleigh_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
    RealType result = 0;
    RealType sigma = c.dist.sigma();
-   static const char* function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::cdf(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
       return result;
@@ -241,21 +241,21 @@ inline RealType cdf(const complemented2_type<rayleigh_distribution<RealType, Pol
 } // cdf complement
 
 template <class RealType, class Policy>
-inline RealType logcdf(const complemented2_type<rayleigh_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<rayleigh_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
 
    RealType result = 0;
    RealType sigma = c.dist.sigma();
-   static const char* function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::logcdf(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
-      return -std::numeric_limits<RealType>::infinity();
+      return -boost::math::numeric_limits<RealType>::infinity();
    }
    RealType x = c.param;
    if(false == detail::verify_rayleigh_x(function, x, &result, Policy()))
    {
-      return -std::numeric_limits<RealType>::infinity();
+      return -boost::math::numeric_limits<RealType>::infinity();
    }
    RealType ea = x * x / (2 * sigma * sigma);
    // Fix for VC11/12 x64 bug in exp(float):
@@ -266,13 +266,13 @@ inline RealType logcdf(const complemented2_type<rayleigh_distribution<RealType,
 } // logcdf complement
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<rayleigh_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<rayleigh_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING // for ADL of std functions, log & sqrt.
 
    RealType result = 0;
    RealType sigma = c.dist.sigma();
-   static const char* function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
       return result;
@@ -295,11 +295,11 @@ inline RealType quantile(const complemented2_type<rayleigh_distribution<RealType
 } // quantile complement
 
 template <class RealType, class Policy>
-inline RealType mean(const rayleigh_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const rayleigh_distribution<RealType, Policy>& dist)
 {
    RealType result = 0;
    RealType sigma = dist.sigma();
-   static const char* function = "boost::math::mean(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::mean(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
       return result;
@@ -309,11 +309,11 @@ inline RealType mean(const rayleigh_distribution<RealType, Policy>& dist)
 } // mean
 
 template <class RealType, class Policy>
-inline RealType variance(const rayleigh_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const rayleigh_distribution<RealType, Policy>& dist)
 {
    RealType result = 0;
    RealType sigma = dist.sigma();
-   static const char* function = "boost::math::variance(const rayleigh_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::variance(const rayleigh_distribution<%1%>&, %1%)";
    if(false == detail::verify_sigma(function, sigma, &result, Policy()))
    {
       return result;
@@ -323,20 +323,20 @@ inline RealType variance(const rayleigh_distribution<RealType, Policy>& dist)
 } // variance
 
 template <class RealType, class Policy>
-inline RealType mode(const rayleigh_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const rayleigh_distribution<RealType, Policy>& dist)
 {
    return dist.sigma();
 }
 
 template <class RealType, class Policy>
-inline RealType median(const rayleigh_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const rayleigh_distribution<RealType, Policy>& dist)
 {
    using boost::math::constants::root_ln_four;
    return root_ln_four<RealType>() * dist.sigma();
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const rayleigh_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const rayleigh_distribution<RealType, Policy>& /*dist*/)
 {
   return static_cast<RealType>(0.63111065781893713819189935154422777984404221106391L);
   // Computed using NTL at 150 bit, about 50 decimal digits.
@@ -344,7 +344,7 @@ inline RealType skewness(const rayleigh_distribution<RealType, Policy>& /*dist*/
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const rayleigh_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const rayleigh_distribution<RealType, Policy>& /*dist*/)
 {
   return static_cast<RealType>(3.2450893006876380628486604106197544154170667057995L);
   // Computed using NTL at 150 bit, about 50 decimal digits.
@@ -352,7 +352,7 @@ inline RealType kurtosis(const rayleigh_distribution<RealType, Policy>& /*dist*/
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const rayleigh_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const rayleigh_distribution<RealType, Policy>& /*dist*/)
 {
   return static_cast<RealType>(0.2450893006876380628486604106197544154170667057995L);
   // Computed using NTL at 150 bit, about 50 decimal digits.
@@ -360,9 +360,9 @@ inline RealType kurtosis_excess(const rayleigh_distribution<RealType, Policy>& /
 } // kurtosis_excess
 
 template <class RealType, class Policy>
-inline RealType entropy(const rayleigh_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const rayleigh_distribution<RealType, Policy>& dist)
 {
-   using std::log;
+   BOOST_MATH_STD_USING
    return 1 + log(dist.sigma()*constants::one_div_root_two<RealType>()) + constants::euler<RealType>()/2;
 }
 
diff --git a/include/boost/math/distributions/saspoint5.hpp b/include/boost/math/distributions/saspoint5.hpp
new file mode 100644
index 0000000000..7846b99560
--- /dev/null
+++ b/include/boost/math/distributions/saspoint5.hpp
@@ -0,0 +1,2796 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_STATS_SASPOINT5_HPP
+#define BOOST_STATS_SASPOINT5_HPP
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4127) // conditional expression is constant
+#endif
+
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/tools/big_constant.hpp>
+#include <boost/math/distributions/complement.hpp>
+#include <boost/math/distributions/detail/common_error_handling.hpp>
+#include <boost/math/distributions/detail/derived_accessors.hpp>
+#include <boost/math/tools/rational.hpp>
+#include <boost/math/tools/promotion.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/distributions/fwd.hpp>
+#include <cmath>
+#endif
+
+namespace boost { namespace math {
+template <class RealType, class Policy>
+class saspoint5_distribution;
+
+namespace detail {
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 0.125) {
+        // Rational Approximation
+        // Maximum Relative Error: 7.8747e-17
+        BOOST_MATH_STATIC const RealType P[13] = {
+            static_cast<RealType>(6.36619772367581343076e-1),
+            static_cast<RealType>(2.17275699713513462507e2),
+            static_cast<RealType>(3.49063163361344578910e4),
+            static_cast<RealType>(3.40332906932698464252e6),
+            static_cast<RealType>(2.19485577044357440949e8),
+            static_cast<RealType>(9.66086435948730562464e9),
+            static_cast<RealType>(2.90571833690383003932e11),
+            static_cast<RealType>(5.83089315593106044683e12),
+            static_cast<RealType>(7.37911022713775715766e13),
+            static_cast<RealType>(5.26757196603002476852e14),
+            static_cast<RealType>(1.75780353683063527570e15),
+            static_cast<RealType>(1.85883041942144306222e15),
+            static_cast<RealType>(4.19828222275972713819e14),
+        };
+        BOOST_MATH_STATIC const RealType Q[15] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.41295871011779138155e2),
+            static_cast<RealType>(5.48907134827349102297e4),
+            static_cast<RealType>(5.36641455324410261980e6),
+            static_cast<RealType>(3.48045461004960397915e8),
+            static_cast<RealType>(1.54920747349701741537e10),
+            static_cast<RealType>(4.76490595358644532404e11),
+            static_cast<RealType>(1.00104823128402735005e13),
+            static_cast<RealType>(1.39703522470411802507e14),
+            static_cast<RealType>(1.23724881334160220266e15),
+            static_cast<RealType>(6.47437580921138359461e15),
+            static_cast<RealType>(1.77627318260037604066e16),
+            static_cast<RealType>(2.04792815832538146160e16),
+            static_cast<RealType>(7.45102534638640681964e15),
+            static_cast<RealType>(3.68496090049571174527e14),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 0.25) {
+        RealType t = x - static_cast <RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.1471e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(4.35668401768623200524e-1),
+            static_cast<RealType>(7.12477357389655327116e0),
+            static_cast<RealType>(4.02466317948738993787e1),
+            static_cast<RealType>(9.04888497628205955839e1),
+            static_cast<RealType>(7.56175387288619211460e1),
+            static_cast<RealType>(1.26950253999694502457e1),
+            static_cast<RealType>(-6.59304802132933325219e-1),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.98623818041545101115e1),
+            static_cast<RealType>(1.52856383017632616759e2),
+            static_cast<RealType>(5.70706902111659740041e2),
+            static_cast<RealType>(1.06454927680197927878e3),
+            static_cast<RealType>(9.13160352749764887791e2),
+            static_cast<RealType>(2.58872466837209126618e2),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 0.5) {
+        RealType t = x - static_cast <RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.3265e-17
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(2.95645445681747568732e-1),
+            static_cast<RealType>(2.23779537590791610124e0),
+            static_cast<RealType>(5.01302198171248036052e0),
+            static_cast<RealType>(2.76363131116340641935e0),
+            static_cast<RealType>(1.18134858311074670327e-1),
+            static_cast<RealType>(2.00287083462139382715e-2),
+            static_cast<RealType>(-7.53979800555375661516e-3),
+            static_cast<RealType>(1.37294648777729527395e-3),
+        };
+        BOOST_MATH_STATIC const RealType Q[6] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.02879626214781666701e1),
+            static_cast<RealType>(3.85125274509784615691e1),
+            static_cast<RealType>(6.18474367367800231625e1),
+            static_cast<RealType>(3.77100050087302476029e1),
+            static_cast<RealType>(5.41866360740066443656e0),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 1) {
+        RealType t = x - static_cast <RealType>(0.5);
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.7947e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(1.70762401725206223811e-1),
+            static_cast<RealType>(8.43343631021918972436e-1),
+            static_cast<RealType>(1.39703819152564365627e0),
+            static_cast<RealType>(8.75843324574692085009e-1),
+            static_cast<RealType>(1.86199552443747562584e-1),
+            static_cast<RealType>(7.35858280181579907616e-3),
+            static_cast<RealType>(-1.03693607694266081126e-4),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(6.73363440952557318819e0),
+            static_cast<RealType>(1.74288966619209299976e1),
+            static_cast<RealType>(2.15943268035083671893e1),
+            static_cast<RealType>(1.29818726981381859879e1),
+            static_cast<RealType>(3.40707211426946022041e0),
+            static_cast<RealType>(2.80229012541729457678e-1),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.7051e-18
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(8.61071469126041183247e-2),
+            static_cast<RealType>(1.69689585946245345838e-1),
+            static_cast<RealType>(1.09494833291892212033e-1),
+            static_cast<RealType>(2.76619622453130604637e-2),
+            static_cast<RealType>(2.44972748006913061509e-3),
+            static_cast<RealType>(4.09853605772288438003e-5),
+            static_cast<RealType>(-2.63561415158954865283e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.04082856018856244947e0),
+            static_cast<RealType>(3.52558663323956252986e0),
+            static_cast<RealType>(1.94795523079701426332e0),
+            static_cast<RealType>(5.23956733400745421623e-1),
+            static_cast<RealType>(6.19453597593998871667e-2),
+            static_cast<RealType>(2.31061984192347753499e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.9247e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(3.91428580496513429479e-2),
+            static_cast<RealType>(4.07162484034780126757e-2),
+            static_cast<RealType>(1.43342733342753081931e-2),
+            static_cast<RealType>(2.01622178115394696215e-3),
+            static_cast<RealType>(1.00648013467757737201e-4),
+            static_cast<RealType>(9.51545046750892356441e-7),
+            static_cast<RealType>(-3.56598940936439037087e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.63904431617187026619e0),
+            static_cast<RealType>(1.03812003196677309121e0),
+            static_cast<RealType>(3.18144310790210668797e-1),
+            static_cast<RealType>(4.81930155615666517263e-2),
+            static_cast<RealType>(3.25435391589941361778e-3),
+            static_cast<RealType>(7.01626957128181647457e-5),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.6547e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(1.65057384221262866484e-2),
+            static_cast<RealType>(8.05429762031495873704e-3),
+            static_cast<RealType>(1.35249234647852784985e-3),
+            static_cast<RealType>(9.18685252682786794440e-5),
+            static_cast<RealType>(2.23447790937806602674e-6),
+            static_cast<RealType>(1.03176916111395079569e-8),
+            static_cast<RealType>(-1.94913182592441292094e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.10113554189626079232e-1),
+            static_cast<RealType>(2.54175325409968367580e-1),
+            static_cast<RealType>(3.87119072807894983910e-2),
+            static_cast<RealType>(2.92520770162792443587e-3),
+            static_cast<RealType>(9.89094130526684467420e-5),
+            static_cast<RealType>(1.07148513311070719488e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.5484e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(6.60044810497290557553e-3),
+            static_cast<RealType>(1.59342644994950292031e-3),
+            static_cast<RealType>(1.32429706922966110874e-4),
+            static_cast<RealType>(4.45378136978435909660e-6),
+            static_cast<RealType>(5.36409958111394628239e-8),
+            static_cast<RealType>(1.22293787679910067873e-10),
+            static_cast<RealType>(-1.16300443044165216564e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.10446485803039594111e-1),
+            static_cast<RealType>(6.51887342399859289520e-2),
+            static_cast<RealType>(5.02151225308643905366e-3),
+            static_cast<RealType>(1.91741179639551137839e-4),
+            static_cast<RealType>(3.27316600311598190022e-6),
+            static_cast<RealType>(1.78840301213102212857e-8),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.9866e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(2.54339461777955741686e-3),
+            static_cast<RealType>(3.10069525357852579756e-4),
+            static_cast<RealType>(1.30082682796085732756e-5),
+            static_cast<RealType>(2.20715868479255585050e-7),
+            static_cast<RealType>(1.33996659756026452288e-9),
+            static_cast<RealType>(1.53505360463827994365e-12),
+            static_cast<RealType>(-7.42649416356965421308e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.09203384450859785642e-1),
+            static_cast<RealType>(1.69422626897631306130e-2),
+            static_cast<RealType>(6.65649059670689720386e-4),
+            static_cast<RealType>(1.29654785666009849481e-5),
+            static_cast<RealType>(1.12886139474560969619e-7),
+            static_cast<RealType>(3.14420104899170413840e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.3581e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(9.55085695067883584460e-4),
+            static_cast<RealType>(5.86125496733202756668e-5),
+            static_cast<RealType>(1.23753971325810931282e-6),
+            static_cast<RealType>(1.05643819745933041408e-8),
+            static_cast<RealType>(3.22502949410095015524e-11),
+            static_cast<RealType>(1.85366144680157942079e-14),
+            static_cast<RealType>(-4.53975807317403152058e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.05980850386474826374e-1),
+            static_cast<RealType>(4.34966042652000070674e-3),
+            static_cast<RealType>(8.66341538387446465700e-5),
+            static_cast<RealType>(8.55608082202236124363e-7),
+            static_cast<RealType>(3.77719968378509293354e-9),
+            static_cast<RealType>(5.33287361559571716670e-12),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.7450e-19
+        BOOST_MATH_STATIC const RealType P[5] = {
+            static_cast<RealType>(1.99471140200716338970e-1),
+            static_cast<RealType>(-1.93310094131437487158e-2),
+            static_cast<RealType>(-8.44282614309073196195e-3),
+            static_cast<RealType>(3.47296024282356038069e-3),
+            static_cast<RealType>(-4.05398011689821941383e-4),
+        };
+        BOOST_MATH_STATIC const RealType Q[5] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(7.00973251258577238892e-1),
+            static_cast<RealType>(2.66969681258835723157e-1),
+            static_cast<RealType>(5.51785147503612200456e-2),
+            static_cast<RealType>(6.50130030979966274341e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x;
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 0.0625) {
+        // Rational Approximation
+        // Maximum Relative Error: 8.8841e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[27] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.36619772367581343075535053490057448138e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.57459506929453385798277946154823008327e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46717322844023441698710451505816706570e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71501459971530549476153273173061194095e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76700973495278431084530045707075552432e10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01328150775099946510145440412520620021e13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70028222513668830210058353057559790101e15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29641781943744384078006991488193839955e17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52611994112742436432957758588495082163e19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27833177267552931459542318826727288124e21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68946162731840551853993619351896931533e23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02965010233956763504899745874128908220e25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.14128569264874914146628076133997950655e26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09103580386900060922163883603492216942e28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.86778299087452621293332172137014749128e29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80029712249744334924217328667885673985e31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70890080432228368476255091774238573277e32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88600513999992354909078399482884993261e33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01189178534848836605739139176681647755e34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06531475170803043941021113424602440078e35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64956999370443524098457423629252855270e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44276098283517934229787916584447559248e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.45856704224433991524661028965741649584e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.47263237190968408624388275549716907309e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.66186300951901408251743228798832386260e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.48064966533519934186356663849904556319e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64877082086372991309408001661535573441e35),
+        };
+        BOOST_MATH_STATIC const RealType Q[28] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18981461118065892086304195732751798634e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01761929839041982958990681130944341399e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69465252239913021973760046507387620537e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.49221044103838155300076098325950584061e10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59327386289821190042576978177896481082e13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67528179803224728786405503232064643870e15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61672367849271591791062829736720884633e17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98395893917909208201801908435620016552e19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60025693881358827551113845076726845495e21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67730578745705562356709169493821118109e23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63843883526710042156562706339553092312e25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.23075214698024188140971761421762265880e26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.37775321923937393366376907114580842429e28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12444724625354796650300159037364355605e30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00860835602766063447009568106012449767e31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.39614080159468893509273006948526469708e32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06547095715472468415058181351212520255e34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36755997709303811764051969789337337957e35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32519530489892818585066019217287415587e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.45230390606834183602522256278256501404e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80344475131699029428900627020022801971e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66469314795307459840482483320814279444e38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70209065673736156218117594311801487932e38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.84490531246108754748100009460860427732e38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30215083398643966091721732133851539475e38),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.58032845332990262754766784625271262271e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.50461648438613634025964361513066059697e36),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 0.125) {
+        RealType t = x - static_cast <RealType>(0.0625);
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.4585e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46416716200748206779925127900698754119e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.41771273526123373239570033672829787791e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56142610225585235535211648703534340871e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15655694129872563686497490176725921724e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.00791883661952751945853742455643714995e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30252591667828615354689186280704562254e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76168115448224677276551213052798322583e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.88534624532179841393387625270218172719e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14740447137831585842166880265350244623e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14904082614021239315925958812100948136e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.76866867279164114004579652405104553404e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53475339598769347326916978463911377965e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.88896160275915786487519266368539625326e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05543800791717482823610940401201712196e4),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.44407579416524903840331499438398472639e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15911780811299460009161345260146251462e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.88457596285725454686358792906273558406e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.66501639812506059997744549411633476528e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.12674134216028769532305433586266118000e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.87676063477990584593444083577765264392e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56084282739608760299329382263598821653e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.34250986378665047914811630036201995871e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31288233106689286803200674021353188597e9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33621494302241474082474689597125896975e9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.63379428046258653791600947328520263412e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14558538557562267533922961110917101850e8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 0.25) {
+        RealType t = x - static_cast <RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.9278e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35668401768623200524372663239480799018e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30066509937988171489091367354416214000e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.05924026744937322690717755156090122074e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.12998524955326375684693500551926325112e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52237930808361186011042950178715609183e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.10734809597587633852077152938985998879e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20796157836149826988172603622242119074e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12398478061053302537736799402801934778e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17841330491647012385157454335820786724e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46281413765362795389526259057436151953e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52220357379402116641048490644093497829e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.51130316105543847380510577656570543736e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.32201781975497810173532067354797097401e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.96874547436310030183519174847668703774e0),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.63164311578114868477819520857286165076e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34964379844144961683927306966955217328e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82966031793809959278519002412667883288e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56215285850856046267451500310816276675e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.81046679663412610005501878092824281161e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.33868038251479411246071640628518434659e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.46262495881941625571640264458627940579e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.40052628730443097561652737049917920495e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44394803828297754346261138417756941544e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.56647617803506258343236509255155360957e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.53513095899009948733175317927025056561e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69130675750530663088963759279778748696e5),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 0.5) {
+        RealType t = x - static_cast <RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.9378e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95645445681747568731488283573032414811e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.83246437763964151893665752064650172391e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.85333417559435252576820440080930004674e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90974714199542064991001365628659054084e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39707205668285805800884524044738261436e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.24814598419826565698241508792385416075e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.95012897118808793886195172068123345314e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87265743900139300849404272909665705025e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.98795164648056126707212245325405968413e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07012128790318535418330629467906917213e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99797198893523173981812955075412130913e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55029227544167913873724286459253168886e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54064889901609722583601330171719819660e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.72254289950537680833853394958874977464e-3),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.58291085070053442257438623486099473087e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95618461039379226195473938654286975682e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97427161745150579714266897556974326502e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.52730436681412535198281529590508861106e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49521185356761585062135933350225236726e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.03881178612341724262911142022761966061e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.02360046338629039644581819847209730553e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65580339066083507998465454599272345735e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.15462499626138125314518636645472893045e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61951767959774678843021179589300545717e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.60745557054877240279811529503888551492e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91061555870569579915258835459255406575e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43045229010040855016672246098687100063e1),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 1) {
+        RealType t = x - static_cast <RealType>(0.5);
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.4363e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70762401725206223811383500786268939645e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19353011456197635663058525904929358535e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22974648900600015961253465796487372402e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91951696059042324975935209295355569292e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.79039444119906169910281912009369164227e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00089963120992100860902142265631127046e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37108883429306700857182028809960789020e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.49586566873564432788366931251358248417e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49790521605774884174840168128255220471e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.90660338063979435668763608259382712726e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93409982383888149064797608605579930804e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22802459215932860445033185874876812040e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.07739227340181463034286653569468171767e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.02669738424010290973023004028523684766e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46404480283267324138113869370306506431e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.54550184643308468933661600211579108422e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.63602410602063476726031476852965502123e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.94463479638213888403144706176973026333e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48607087483870766806529883069123352339e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69715692924508994524755312953665710218e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33237849965272853370191827043868842100e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08460086451666825383009487734769646087e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.47365552394788536087148438788608689300e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38010282703940184371247559455167674975e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.67219842525655806370702248122668214685e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.01852843874982199859775136086676841910e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.14767043526088185802569803397824432028e-3),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.1244e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.61071469126041183247373313827161939454e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.35837460186564880289965856498718321896e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47783071967681246738651796742079530382e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16019502727107539284403003943433359877e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80510046274709592896987229782879937271e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30456542768955299533391113704078540955e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36539167913428133313942008990965988621e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76450743657913389896743235938695682829e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42847090205575096649865021874905747106e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41380341540026027117735179862124402398e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.40549721587212773424211923602910622515e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09089653391032945883918434200567278139e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21403900721572475664926557233205232491e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.13172035933794917563324458011617112124e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65687100738157412154132860910003018338e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59672433683883998168388916533196510994e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61469557815097583209668778301921207455e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.77070955301136405523492329700943077340e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.20825570431301943907348077675777546304e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.60136197167727810483751794121979805142e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.53723076053642006159503073104152703814e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63397465217490984394478518334313362490e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.40577918603319523990542237990107206371e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94376458316662573143947719026985667328e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.09333568224541559157192543410988474886e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.59947287428695057506683902409023760438e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.1110e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91428580496513429479068747515164587814e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69015019070193436467106672180804948494e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03147451266231819912643754579290008651e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18825170881552297150779588545792258740e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30548850262278582401286533053286406505e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.54315108501815531776138839512564427279e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.66434584176931077662201101557716482514e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.66158632576958238392567355014249971287e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31365802206301246598393821671437863818e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85378389166807263837732376845556856416e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20375363151456683883984823721339648679e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06637401794693307359898089790558771957e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08663671047376684678494625068451888284e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.07443397096591141329212291707948432414e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16665031056584124503224711639009530348e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27666060511630720485121299731204403783e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65646979169107732387032821262953301311e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.63594064986880863092994744424349361396e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31360114173642293100378020953197965181e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.09489929949457075237756409511944811481e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24574519309785870806550506199124944514e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56048486483867679310086683710523566607e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60417286783794818094722636906776809193e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53154117367296710469692755461431646999e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60041713691072903334637560080298818163e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77381528950794767694352468734042252745e-12),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.5228e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.65057384221262866484014802392420311075e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.92801687242885330588201777283015178448e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65508815862861196424333614846876229064e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71545573465295958468808641544341412235e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72077718130407940498710469661947719216e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26299620525538984108147098966692839348e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77971404992990847565880351976461271350e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71176235845517643695464740679643640241e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.64603919225244695533557520384631958897e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.85274347406803894317891882905083368489e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48564096627181435612831469651920186491e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.90886715044580341917806394089282500340e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -6.39396206221935864416563232680283312796e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37760675743046300528308203869876086823e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49023284463742780238035958819642738891e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.76284367953836866133894756472541395734e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.69932155343422362573146811195224195135e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97593541520549770519034085640975455763e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45862809001322359249894968573830094537e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.61348135835522976885804369721316193713e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21069949470458047530981551232427019037e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03132437580490629136144285669590192597e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91030348024641585284338958059030520141e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.56320479309161046934628280237629402373e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.39524198476052364627683067034422502163e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18666081063885228839052386515073873844e-13),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.6732e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60044810497290557552736366450372523266e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27034183360438185616541260923634443241e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19813403884333707962156711479716066536e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91346554854771687970018076643044998737e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91975837766081548424458764226669789039e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26031304514411902758114277797443618334e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.47127194811140370123712253347211626753e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55248861254135821097921903190564312000e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78340847719683652633864722047250151066e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95888612422041337572422846394029849086e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66363005792960308636467394552324255493e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.93244800648299424751906591077496534948e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.95046217952146113063614290717113024410e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.46784746963816915795587433372284530785e-25),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.16012189991825507132967712656930682478e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95202772611563835130347051925062280272e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.23801477561401113332870463345197159418e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.54022665579711946784722766000062263305e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.94266182294627770206082679848878391116e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11782839184878848480753630961211685630e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.28827067686094594197542725283923947812e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.00220719177374237332018587370837457299e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.42250513143925626748132661121749401409e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82007216963767723991309138907689681422e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34214834652884406013489167210936679359e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85519293212465087373898447546710143008e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.96728437809303144188312623363453475831e-19),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.0113e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54339461777955741686401041938275102207e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.17747085249877439037826121862689145081e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14104576580586095462211756659036062930e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.06903778663262313120049231822412184382e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53115958954246158081703822428768781010e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48225007017630665357941682179157662142e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20810829523286181556951002345409843125e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.54070972719909957155251432996372246019e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06258623970363729581390609798632080752e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15603641527498625694677136504611545743e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05376970060354261667000502105893106009e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.14727542705613448694396750352455931731e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76883960167449461476228984331517762578e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.03558202009465610972808653993060437679e-29),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08809672969012756295937194823378109391e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.41148083436617376855422685448827300528e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.85101541143091590863368934606849033688e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.38984899982960112626157576750593711628e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51437845497783812562009857096371643785e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12891276596072815764119699444334380521e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82412500887161687329929693518498698716e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80215715026891688444965605768621763721e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.85838684678780184082810752634454259831e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83675729736846176693608812315852523556e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80347165008408134158968403924819637224e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23639219622240634094606955067799349447e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.63446235885036169537726818244420509024e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.7056e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.55085695067883584460317653567009454037e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52919532248638251721278667010429548877e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06266842295477991789450356745903177571e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.20671609948319334255323512011575892813e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04692714549374449244320605137676408001e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70605481454469287545965803970738264158e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83960996572005209177458712170004097587e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29732261733491885750067029092181853751e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.78385693918239619309147428897790440735e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52969197316398995616879018998891661712e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14063120299947677255281707434419044806e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25957675329657493245893497219459256248e-25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.55238112862817593053765898004447484717e-29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -8.93970406521541790658675747195982964585e-34),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04722757068068234153968603374387493579e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85854131835804458353300285777969427206e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.85809281481040288085436275150792074968e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.38860750164285700051427698379841626305e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.91463283601681120487987016215594255423e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28104952818420195583669572450494959042e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43912720109615655035554724090181888734e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10668954229813492117417896681856998595e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.65093571330749369067212003571435698558e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81758227619561958470583781325371429458e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.36970757752002915423191164330598255294e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06487673393164724939989217811068656932e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.47121057452822097779067717258050172115e-27),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.1032e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[8] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99471140200716338969973029967190934238e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82846732476244747063962056024672844211e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -3.69724475658159099827638225237895868258e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21259630917863228526439367416146293173e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.13469812721679130825429547254346177005e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.73237434182338329541631611908947123606e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72986150007117100707304201395140411630e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -2.53567129749337040254350979652515879881e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.89815449697874475254942178935516387239e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.21223228867921988134838870379132038419e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79514417558927397512722128659468888701e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43331254539687594239741585764730095049e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.99078779616201786316256750758748178864e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04590833634768023225748107112347131311e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.17497990182339853998751740288392648984e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53420609011698705803549938558385779137e-6),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t / x;
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53> &tag) {
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    return saspoint5_pdf_plus_imp_prec<RealType>(abs(x), tag);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>& tag) {
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    return saspoint5_pdf_plus_imp_prec<RealType>(abs(x), tag);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_pdf_imp(const saspoint5_distribution<RealType, Policy>& dist, const RealType& x) {
+    //
+    // This calculates the pdf of the Saspoint5 distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::pdf(saspoint5<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale;
+
+    result = saspoint5_pdf_imp_prec(u, tag_type()) / scale;
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 0.5) {
+        // Rational Approximation
+        // Maximum Relative Error: 2.6225e-17
+        BOOST_MATH_STATIC const RealType P[16] = {
+            static_cast<RealType>(5.0e-1),
+            static_cast<RealType>(1.11530082549581486148e2),
+            static_cast<RealType>(1.18564167533523512811e4),
+            static_cast<RealType>(7.51503793077701705413e5),
+            static_cast<RealType>(3.05648233678438482191e7),
+            static_cast<RealType>(8.12176734530090957088e8),
+            static_cast<RealType>(1.39533182836234507573e10),
+            static_cast<RealType>(1.50394359286077974212e11),
+            static_cast<RealType>(9.79057903542935575811e11),
+            static_cast<RealType>(3.73800992855150140014e12),
+            static_cast<RealType>(8.12697090329432868343e12),
+            static_cast<RealType>(9.63154058643818290870e12),
+            static_cast<RealType>(5.77714904017642642181e12),
+            static_cast<RealType>(1.53321958252091815685e12),
+            static_cast<RealType>(1.36220966258718212359e11),
+            static_cast<RealType>(1.70766655065405022702e9),
+        };
+        BOOST_MATH_STATIC const RealType Q[16] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.24333404643898143947e2),
+            static_cast<RealType>(2.39984636687021023600e4),
+            static_cast<RealType>(1.53353791432086858132e6),
+            static_cast<RealType>(6.30764952479861776476e7),
+            static_cast<RealType>(1.70405769169309597488e9),
+            static_cast<RealType>(3.00381227010195289341e10),
+            static_cast<RealType>(3.37519046677507392667e11),
+            static_cast<RealType>(2.35001610518109063314e12),
+            static_cast<RealType>(9.90961948200767679416e12),
+            static_cast<RealType>(2.47066673978544828258e13),
+            static_cast<RealType>(3.51442593932882610556e13),
+            static_cast<RealType>(2.68891431106117733130e13),
+            static_cast<RealType>(9.99723484253582494535e12),
+            static_cast<RealType>(1.49190229409236772612e12),
+            static_cast<RealType>(5.68752980146893975323e10),
+        };
+
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 1) {
+        RealType t = x - static_cast <RealType>(0.5);
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.2135e-19
+        BOOST_MATH_STATIC const RealType P[8] = {
+            static_cast<RealType>(3.31309550000758082456e-1),
+            static_cast<RealType>(1.63012162307622129396e0),
+            static_cast<RealType>(2.97763161467248770571e0),
+            static_cast<RealType>(2.49277948739575294031e0),
+            static_cast<RealType>(9.49619262302649586821e-1),
+            static_cast<RealType>(1.38360148984087584165e-1),
+            static_cast<RealType>(4.00812864075652334798e-3),
+            static_cast<RealType>(-4.82051978765960490940e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(5.43565383128046471592e0),
+            static_cast<RealType>(1.13265160672130133152e1),
+            static_cast<RealType>(1.13352316246726435292e1),
+            static_cast<RealType>(5.56671465170409694873e0),
+            static_cast<RealType>(1.21011708389501479550e0),
+            static_cast<RealType>(8.34618282872428849500e-2),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.4688e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(2.71280312689343248819e-1),
+            static_cast<RealType>(7.44610837974139249205e-1),
+            static_cast<RealType>(7.17844128359406982825e-1),
+            static_cast<RealType>(2.98789060945288850507e-1),
+            static_cast<RealType>(5.22747411439102272576e-2),
+            static_cast<RealType>(3.06447984437786430265e-3),
+            static_cast<RealType>(2.60407071021044908690e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.06221257507188300824e0),
+            static_cast<RealType>(3.44827372231472308047e0),
+            static_cast<RealType>(1.78166113338930668519e0),
+            static_cast<RealType>(4.25580478492907232687e-1),
+            static_cast<RealType>(4.09983847731128510426e-2),
+            static_cast<RealType>(1.04343172183467651240e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.2289e-18
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(2.13928162275383716645e-1),
+            static_cast<RealType>(2.35139109235828185307e-1),
+            static_cast<RealType>(9.35967515134932733243e-2),
+            static_cast<RealType>(1.64310489592753858417e-2),
+            static_cast<RealType>(1.23186728989215889119e-3),
+            static_cast<RealType>(3.13500969261032539402e-5),
+            static_cast<RealType>(1.17021346758965979212e-7),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.28212183177829510267e0),
+            static_cast<RealType>(6.17321009406850420793e-1),
+            static_cast<RealType>(1.38400318019319970893e-1),
+            static_cast<RealType>(1.44994794535896837497e-2),
+            static_cast<RealType>(6.17774446282546623636e-4),
+            static_cast<RealType>(7.00521050169239269819e-6),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.7284e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(1.63772802979087193656e-1),
+            static_cast<RealType>(9.69009603942214234119e-2),
+            static_cast<RealType>(2.08261725719828138744e-2),
+            static_cast<RealType>(1.97965182693146960970e-3),
+            static_cast<RealType>(8.05499273532204276894e-5),
+            static_cast<RealType>(1.11401971145777879684e-6),
+            static_cast<RealType>(2.25932082770588727842e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(6.92463563872865541733e-1),
+            static_cast<RealType>(1.80720987166755982366e-1),
+            static_cast<RealType>(2.20416647324531054557e-2),
+            static_cast<RealType>(1.26052070140663063778e-3),
+            static_cast<RealType>(2.93967534265875431639e-5),
+            static_cast<RealType>(1.82706995042259549615e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.9609e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(1.22610122564874280532e-1),
+            static_cast<RealType>(3.70273222121572231593e-2),
+            static_cast<RealType>(4.06083618461789591121e-3),
+            static_cast<RealType>(1.96898134215932126299e-4),
+            static_cast<RealType>(4.08421066512186972853e-6),
+            static_cast<RealType>(2.87707419853226244584e-8),
+            static_cast<RealType>(2.96850126180387702894e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.55825191301363023576e-1),
+            static_cast<RealType>(4.77251766176046719729e-2),
+            static_cast<RealType>(2.99136605131226103925e-3),
+            static_cast<RealType>(8.78895785432321899939e-5),
+            static_cast<RealType>(1.05235770624006494709e-6),
+            static_cast<RealType>(3.35423877769913468556e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.6559e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(9.03056141356415077080e-2),
+            static_cast<RealType>(1.37568904417652631821e-2),
+            static_cast<RealType>(7.60947271383247418831e-4),
+            static_cast<RealType>(1.86048302967560067128e-5),
+            static_cast<RealType>(1.94537860496575427218e-7),
+            static_cast<RealType>(6.90524093915996283104e-10),
+            static_cast<RealType>(3.58808434477817122371e-13),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(1.80501347735272292079e-1),
+            static_cast<RealType>(1.22807958286146936376e-2),
+            static_cast<RealType>(3.90421541115275676253e-4),
+            static_cast<RealType>(5.81669449234915057779e-6),
+            static_cast<RealType>(3.53005415676201803667e-8),
+            static_cast<RealType>(5.69883025435873921433e-11),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.0653e-17
+        BOOST_MATH_STATIC const RealType P[7] = {
+            static_cast<RealType>(6.57333571766941474226e-2),
+            static_cast<RealType>(5.02795551798163084224e-3),
+            static_cast<RealType>(1.39633616037997111325e-4),
+            static_cast<RealType>(1.71386564634533872559e-6),
+            static_cast<RealType>(8.99508156357247137439e-9),
+            static_cast<RealType>(1.60229460572297160486e-11),
+            static_cast<RealType>(4.17711709622960498456e-15),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(9.10198637347368265508e-2),
+            static_cast<RealType>(3.12263472357578263712e-3),
+            static_cast<RealType>(5.00524795130325614005e-5),
+            static_cast<RealType>(3.75913188747149725195e-7),
+            static_cast<RealType>(1.14970132098893394023e-9),
+            static_cast<RealType>(9.34957119271300093120e-13),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0104e-20
+        BOOST_MATH_STATIC const RealType P[5] = {
+            static_cast<RealType>(3.98942280401432677940e-1),
+            static_cast<RealType>(8.12222388783621449146e-2),
+            static_cast<RealType>(1.68515703707271703934e-2),
+            static_cast<RealType>(2.19801627205374824460e-3),
+            static_cast<RealType>(-5.63321705854968264807e-5),
+        };
+        BOOST_MATH_STATIC const RealType Q[5] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(6.02536240902768558315e-1),
+            static_cast<RealType>(1.99284471400121092380e-1),
+            static_cast<RealType>(3.48012577961755452113e-2),
+            static_cast<RealType>(3.38545004473058881799e-3),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t;
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_plus_imp_prec(const RealType& x, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (x < 0.125) {
+        // Rational Approximation
+        // Maximum Relative Error: 6.9340e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[30] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.0e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.25520067710293108163697513129883130648e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.70866020657515874782126804139443323023e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.00865235319309486225795793030882782077e7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.15226363537737769449645357346965170790e10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90371247243851280277289046301838071764e12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.55124590509169425751300134399513503679e14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.31282020412787511681760982839078664474e16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.81134278666896523873256421982740565131e18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.36154530125229747305141034242362609073e20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.67793867640429875837167908549938345465e22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34584264816825205490037614178084070903e24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.52622279567059369718208827282730379468e25),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84678324511679577282571711018484545185e27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.99412564257799793932936828924325638617e28),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.08467105431111959283045453636520222779e30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87466808926544728702827204697734995611e31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55020252231174414164534905191762212055e32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69582736077420504345389671165954321163e33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18203860972249826626461130638196586188e34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.32955733788770318392204091471121129386e35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.97972270315674052071792562126668438695e35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93941537398987201071027348577636994465e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40818708062034138095495206258366082481e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.76833406751769751643745383413977973530e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.67873467711368838525239991688791162617e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.94179310584115437584091984619858795365e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24348215908456320362232906012152922949e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71625432346533320597285660433110657670e35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.54662474187354179772157464533408058525e33),
+        };
+        BOOST_MATH_STATIC const RealType Q[31] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05231337496532137901354609636674085703e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.43071888317491317900094470796567113997e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80864482202910830302921131771345102044e8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.32755297215862998181755216820621285536e10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.86251123527611073428156549377791985741e12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.12025543961949466786297141758805461421e15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27681657695574252637426145112570596483e17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17850553865715973904162289375819555884e19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.87285897504702686250962844939736867339e20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46852796231948446334549476317560711795e22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76101689878844725930808096548998198853e24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14017776727845251567032313915953239178e26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83738954971390158348334918235614003163e27),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04701345216121451992682705965658316871e29),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29994190638467725374533751141434904865e30),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.03334024242845994501493644478442360593e31),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.59094378123268840693978620156028975277e32),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05630254163426327113368743426054256780e33),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.06008195534030444387061989883493342898e34),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21262490304347036689874956206774563906e35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52353024633841796119920505314785365242e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28839143293381125956284415313626962263e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31074057704096457802547386358094338369e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24762412200364040971704861346921094354e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.55663903116458425420509083471048286114e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81839283802391753865642022579846918253e37),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40026559327708207943879092058654410696e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48767474646810049293505781106444169229e36),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10591353097667671736865938428051885499e35),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26980708896893794012677171239610721832e33),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, x) / tools::evaluate_polynomial(Q, x);
+    }
+    else if (x < 0.25) {
+        RealType t = x - static_cast <RealType>(0.125);
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.6106e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.31887921568009055676985827521151969069e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62791448964529380666250180886203090183e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18238045199893937316918299064825702894e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.77274519306540522227493503092956314136e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.89424638466340765479970877448972418958e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.84027004420207996285174223581748706097e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.84633134142285937075423713704784530853e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76780579189423063605715733542379494552e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19812409802969581112716039533798357401e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60039008588877024309600768114757310858e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.10268260529501421009222937882726290612e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.72169594688819848498039471657587836720e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27181379647139697258984772894869505788e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.73617450590346508706222885401965820190e1),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.18558935411552146390814444666395959919e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49210559503096368944407109881023223654e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93959323596111340518285858313038058302e4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53590607436758691037825792660167970938e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.82700985983018132572589829602100319330e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62137033935442506086127262036686905276e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76014299715348555304267927238963139228e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.12336796972134088340556958396544477713e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.01952132024838508233050167059872220508e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.41846547214877387780832317250797043384e6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.02083431572388097955901208994308271581e5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.30401057171447074343957754855656724141e4),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 0.5) {
+        RealType t = x - static_cast <RealType>(0.25);
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.1519e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87119665000174806422420129219814467874e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.60769554551148293079169764245570645155e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.49181979810834706538329284478129952168e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15722765491675871778645250624425739489e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65973147084701923411221710174830072860e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93709338011482232037110656459951914303e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.57393131299425403017769538642434714791e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.24110491141294379107651487490031694257e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23670394514211681515965192338544032862e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06141024932329394052395469123628405389e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.08599362073145455095790192415468286304e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.22746783794652085925801188098270888502e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.19652234873414609727168969049557770989e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -5.73529976407853894192156335785920329181e-4),
+        };
+        BOOST_MATH_STATIC const RealType Q[13] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.04157569499592889296640733909653747983e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82248883130787159161541119440215325308e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.35191216924911901198168794737654512677e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05099314677808235578577204150229855903e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.61081069236463123032873733048661305746e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.65340645555368229718826047069323437201e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.44526681322128674428653420882660351679e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.03963804195353853550682049993122898950e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40399236835577953127465726826981753422e3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.46764755170079991793106428011388637748e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.06384106042490712972156545051459068443e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.27614406724572981099586665536543423891e0),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 1) {
+        RealType t = x - static_cast <RealType>(0.5);
+
+        // Rational Approximation
+        // Maximum Relative Error: 7.1196e-37
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[15] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31309550000758082761278726632760756847e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.07242222531117199094690544171275415854e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18286763141875580859241637334381199648e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72102024869298528501604761974348686708e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.31748399999514540052066169132819656757e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.72168003284748405703923567644025252608e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52648991506052496046447777354251378257e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.16777263528764704804758173026143295383e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.66044453196259367950849328889468385159e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.35095952392355288307377427145581700484e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43011308494452327007589069222668324337e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16582092138863383294685790744721021189e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02261914949200575965813000131964695720e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.93943913630044161720796150617166047233e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -9.76395009419307902351328300308365369814e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28073115520716780203055949058270715651e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.20245752585870752942356137496087189194e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.34548337034735803039553186623067144497e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.90925817267776213429724248532378895039e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.92883822651628140083115301005227577059e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.72868136219107985834601503784789993218e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.50498744791568911029110559017896701095e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05178276667813671578581259848923964311e2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.16880263792490095344135867620645018480e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16199396397514668672304602774610890666e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.25543193822942088303609988399416145281e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.62522294286034117189844614005500278984e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18342889744790118595835138444372660676e-3),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 2) {
+        RealType t = x - 1;
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.5605e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71280312689343266367958859259591541365e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49628636612698702680819948707479820292e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61090930375686902075245639803646265081e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.01191924051756106307211298794294657688e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.42496510376427957390465373165464672088e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59577769624139820954046058289100998534e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02664809521258420718170586857797408674e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72258711278476951299824066502536249701e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.72578941800687566921553416498339481887e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.12553368488232553360765667155702324159e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.20749770911901442251726681861858323649e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00621121212654384864006297569770703900e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18976033102817074104109472578202752346e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22093548539863254922531707899658394458e-10),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.83305694892673455436552817409325835774e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.49922543669955056754932640312490112609e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.23488972536322019584648241457582608908e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.14051527527038669918848981363974859889e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37891280136777182304388426277537358346e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.08058775103864815769223385606687612117e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83305488980337433132332401784292281716e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71072208215804671719811563659227630554e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.86332040813989094594982937011005305263e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.87698178237970337664105782546771501188e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69113555019737313680732855691540088318e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.31888539972217875242352157306613891243e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85633766164682554126992822326956560433e-8),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 4) {
+        RealType t = x - 2;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.1494e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.13928162275383718405630406427822960090e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.90742307267701162395764574873947997211e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.13821826367941514387521090205756466068e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.96186879146063565484800486550739025293e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19438785955706463753454881511977831603e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.44969124820016994689518539612465708536e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27841835070651018079759230944461773079e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69952429132675045239242077293594666305e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47919853099168659881487026035933933068e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.04644774117864306055402364094681541437e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.52604718870921084048756263996119841957e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97610950633031564892821158058978809537e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35934159016861180185992558083703785765e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21044098237798939057079316997065892072e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.94437702178976797218081686254875998984e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.82068837586514484653828718675654460991e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87606058269189306593797764456467061128e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39130528408903116343256483948950693356e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.52792074489091396425713962375223436022e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00892233011840867583848470677898363716e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.53717105060592851173320646706141911461e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57293857675930200001382624769341451561e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04219251796696135508847408131139677925e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.20053006131133304932740325113068767057e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.26384707028090985155079342718673255493e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.19146442700994823924806249608315505708e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59879298772002950043508762057850408213e-12),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 8) {
+        RealType t = x - 4;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.9710e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63772802979087199762340235165979751298e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31941534705372320785274994658709390116e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.43643960022585762678456016437621064500e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11415302325466272779041471612529728187e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15767742459744253874067896740220951622e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.74049309186016489825053763513176160256e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.76349898574685150849080543168157785281e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19757202370729036627932327405149840205e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.32067727965321839898287320520750897894e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47636340015260789807543414080472136575e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36236727340568181129875213546468908164e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89379573960280486883733996547662506245e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.71832232038263988173042637335112603365e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.72380245500539326441037770757072641975e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51702400281458104713682413542736419584e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01375164846907815766683647295932603968e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.92796007869834847612192314006582598557e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.77580441164023725582659445614058463183e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63592331843149724480258804892989851727e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.87334158717610115008450674967492650941e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46596056941432875244263245821845070102e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.69980051560936361597177347949112822752e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.61690034211585843423761830218320365457e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.40619773800285766355596852314940341504e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.10624533319804091814643828283820958419e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.10009654621246392691126133176423833259e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.48070591106986983088640496621926852293e-16),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 16) {
+        RealType t = x - 8;
+
+        // Rational Approximation
+        // Maximum Relative Error: 5.2049e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22610122564874286614786819620499101143e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.30352481858382230273216195795534959290e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.45933050053542949214164590814846222512e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.18776888646200567321599584635465632591e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53421996228923143480455729204878676265e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.26075686557831306993734433164305349875e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58045762501721375879877727645933749122e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13469629033419341069106781092024950086e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09157226556088521407323375433512662525e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44961350323527660188267669752380722085e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11052101325523147964890915835024505324e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.26421404354976214191891992583151033361e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.17133505681224996657291059553060754343e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41010917905686427164414364663355769988e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.31062773739451672808456319166347015167e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35386721434011881226168110614121649232e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.39357338312443465616015226804775178232e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26630144036271792027494677957363535353e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.24340476859846183414651435036807677467e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.33917136421389571662908749253850939876e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80972141456523767244381195690041498939e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22653625120465488656616983786525028119e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64072452032620505897896978124863889812e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.61842001579321492488462230987972104386e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96631619425501661980194304605724632777e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.80392324086028812772385536034034039168e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.34254502871215949266781048808984963366e-20),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 32) {
+        RealType t = x - 16;
+
+        // Rational Approximation
+        // Maximum Relative Error: 1.7434e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.03056141356415128156562790092782153630e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.15331583242023443256381237551843296356e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.81847913073640285776566199343276995613e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23578443960486030170636772457627141406e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36906354316016270165240908809929957836e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.80584421020238085239890207672296651219e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.20726437845755296397071540583729544203e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.71042563703818585243207722641746283288e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08307373360265947158569900625482137206e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.80776566500233755365518221977875432763e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11405351639704510305055492207286172753e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21575609293568296049921888011966327905e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66081982641748223969990279975752576675e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96483118060215455299182487430511998831e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.77347004038951368607085827825968614455e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.27559305780716801070924630708599448466e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.05452382624230160738008550961679711827e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75369667590360521677018734348769796476e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56551290985905942229892419848093494661e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.46972102361871185271727958608184616388e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.72423917010499649257775199140781647069e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14337306905269302583746182007852069459e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.69949885309711859563395555285232232606e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00318719634300754237920041312234711548e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41139664927184402637020651515172315287e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79013190225240505774959477465594797961e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.46536966503325413797061462062918707370e-24),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else if (x < 64) {
+        RealType t = x - 32;
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.0402e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.57333571766941514095434647381791040479e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15416685251021339933358981066948923001e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86806753164417557035166075399588122481e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.91972616817770660098405128729991574724e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10225768760715861978198010761036882002e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.05986998674039047865566990469266534338e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.59572646670205456333051888086612875871e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19347294198055585461131949159508730257e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21328285448498841418774425071549974153e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.19331596847283822557042655221763459728e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.36128017817576942059191451016251062072e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.35600223942735523925477855247725326228e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.14658948592500290756690769268766876322e-26),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.86984591055448991335081550609451649866e-30),
+        };
+        BOOST_MATH_STATIC const RealType Q[14] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90112824856612652807095815199496602262e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59291524937386142936420775839969648652e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.74245361925275011235694006013677228467e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41828589449615478387532599798645159282e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08088176420557205743676774127863572768e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30760429417424419297000535744450830697e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18464910867914234357511605329900284981e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.74255540513281299503596269087176674333e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.02616028440371294233330747672966435921e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.26276597941744408946918920573146445795e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.47463109867603732992337779860914933775e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.77217411888267832243050973915295217582e-24),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.67397425207383164084527830512920206074e-28),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t);
+    }
+    else {
+        RealType t = 1 / sqrt(x);
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.2612e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[9] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.98942280401432677939946059934381868476e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33908701314796522684603310107061150444e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.92120397142832495974006972404741124398e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.15463147603421962834297353867930971657e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44488751006069172847577645328482300099e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44057582804743599116332797864164802887e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.02968018188491417839349438941039867033e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, -4.75092244933846337077999183310087492887e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35099582728548602389917143511323566818e-8),
+        };
+        BOOST_MATH_STATIC const RealType Q[7] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.34601617336219074065534356705298927390e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.82954035780824611941899463895040327299e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.70929001162671283123255408612494541378e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.05508596604210030533747793197422815105e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02913299057943756875992272236063124608e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37824426836648736125759177846682556245e-5),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / tools::evaluate_polynomial(Q, t) * t;
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 53>& tag) {
+    if (x >= 0) {
+        return complement ? saspoint5_cdf_plus_imp_prec(x, tag) : 1 - saspoint5_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - saspoint5_cdf_plus_imp_prec(-x, tag) : saspoint5_cdf_plus_imp_prec(-x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_imp_prec(const RealType& x, bool complement, const boost::math::integral_constant<int, 113>& tag) {
+    if (x >= 0) {
+        return complement ? saspoint5_cdf_plus_imp_prec(x, tag) : 1 - saspoint5_cdf_plus_imp_prec(x, tag);
+    }
+    else if (x <= 0) {
+        return complement ? 1 - saspoint5_cdf_plus_imp_prec(-x, tag) : saspoint5_cdf_plus_imp_prec(-x, tag);
+    }
+    else {
+        return boost::math::numeric_limits<RealType>::quiet_NaN();
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_cdf_imp(const saspoint5_distribution<RealType, Policy>& dist, const RealType& x, bool complement) {
+    //
+    // This calculates the cdf of the Saspoint5 distribution and/or its complement.
+    //
+
+    BOOST_MATH_STD_USING // for ADL of std functions
+    constexpr auto function = "boost::math::cdf(saspoint5<%1%>&, %1%)";
+    RealType result = 0;
+    RealType location = dist.location();
+    RealType scale = dist.scale();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_x(function, x, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    RealType u = (x - location) / scale;
+
+    result = saspoint5_cdf_imp_prec(u, complement, tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 53>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (ilogb(p) >= -2) {
+        RealType u = -log2(ldexp(p, 1));
+
+        if (u < 0.125) {
+            // Rational Approximation
+            // Maximum Relative Error: 4.2616e-17
+            BOOST_MATH_STATIC const RealType P[13] = {
+                static_cast<RealType>(1.36099130643975127045e-1),
+                static_cast<RealType>(2.19634434498311523885e1),
+                static_cast<RealType>(1.70276954848343179287e3),
+                static_cast<RealType>(8.02187341786354339306e4),
+                static_cast<RealType>(2.48750112198456813443e6),
+                static_cast<RealType>(5.20617858300443231437e7),
+                static_cast<RealType>(7.31202030685167303439e8),
+                static_cast<RealType>(6.66061403138355591915e9),
+                static_cast<RealType>(3.65687892725590813998e10),
+                static_cast<RealType>(1.06061776220305595494e11),
+                static_cast<RealType>(1.23930642673461465346e11),
+                static_cast<RealType>(1.49986408149520127078e10),
+                static_cast<RealType>(-6.17325587219357123900e8),
+            };
+            BOOST_MATH_STATIC const RealType Q[13] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(1.63111146753825227716e2),
+                static_cast<RealType>(1.27864461509685444043e4),
+                static_cast<RealType>(6.10371533241799228037e5),
+                static_cast<RealType>(1.92422115963507708309e7),
+                static_cast<RealType>(4.11544185502250709497e8),
+                static_cast<RealType>(5.95343302992055062258e9),
+                static_cast<RealType>(5.65615858889758369947e10),
+                static_cast<RealType>(3.30833154992293143503e11),
+                static_cast<RealType>(1.06032392136054207216e12),
+                static_cast<RealType>(1.50071282012095447931e12),
+                static_cast<RealType>(5.43552396263989180433e11),
+                static_cast<RealType>(9.57434915768660935004e10),
+            };
+
+            result = u * tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * (p * p));
+        }
+        else if (u < 0.25) {
+            RealType t = u - static_cast <RealType>(0.125);
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.3770e-19
+            BOOST_MATH_STATIC const RealType P[8] = {
+                static_cast<RealType>(1.46698650748920243698e-2),
+                static_cast<RealType>(3.58380131788385557227e-1),
+                static_cast<RealType>(3.39153750029553194566e0),
+                static_cast<RealType>(1.55457424873957272207e1),
+                static_cast<RealType>(3.44403897039657057261e1),
+                static_cast<RealType>(3.01881531964962975320e1),
+                static_cast<RealType>(2.77679052294606319767e0),
+                static_cast<RealType>(-7.76665288232972435969e-2),
+            };
+            BOOST_MATH_STATIC const RealType Q[7] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(1.72584280323876188464e1),
+                static_cast<RealType>(1.11983518800147654866e2),
+                static_cast<RealType>(3.25969893054048132145e2),
+                static_cast<RealType>(3.91978809680672051666e2),
+                static_cast<RealType>(1.29874252720714897530e2),
+                static_cast<RealType>(2.08740114519610102248e1),
+            };
+
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+        }
+        else if (u < 0.5) {
+            RealType t = u - static_cast <RealType>(0.25);
+
+            // Rational Approximation
+            // Maximum Relative Error: 9.2445e-18
+            BOOST_MATH_STATIC const RealType P[8] = {
+                static_cast<RealType>(2.69627866689346445458e-2),
+                static_cast<RealType>(3.23091180507445216811e-1),
+                static_cast<RealType>(1.42164019533549860681e0),
+                static_cast<RealType>(2.74613170828120023406e0),
+                static_cast<RealType>(2.07865023346180997996e0),
+                static_cast<RealType>(2.53267176863740856907e-1),
+                static_cast<RealType>(-2.55816250186301841152e-2),
+                static_cast<RealType>(3.02683750470398342224e-3),
+            };
+            BOOST_MATH_STATIC const RealType Q[6] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(8.55049920135376003042e0),
+                static_cast<RealType>(2.48726119139047911316e1),
+                static_cast<RealType>(2.79519589592198994574e1),
+                static_cast<RealType>(9.88212916161823866098e0),
+                static_cast<RealType>(1.39749417956251951564e0),
+            };
+
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+        }
+        else {
+            RealType t = u - static_cast <RealType>(0.5);
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.2918e-20
+            BOOST_MATH_STATIC const RealType P[9] = {
+                static_cast<RealType>(4.79518653373241051274e-2),
+                static_cast<RealType>(3.81837125793765918564e-1),
+                static_cast<RealType>(1.13370353708146321188e0),
+                static_cast<RealType>(1.55218145762186846509e0),
+                static_cast<RealType>(9.60938271141036509605e-1),
+                static_cast<RealType>(2.11811755464425606950e-1),
+                static_cast<RealType>(8.84533960603915742831e-3),
+                static_cast<RealType>(1.73314614571009160225e-3),
+                static_cast<RealType>(-3.63491208733876986098e-5),
+            };
+            BOOST_MATH_STATIC const RealType Q[8] = {
+                static_cast<RealType>(1.),
+                static_cast<RealType>(6.36954463000253710936e0),
+                static_cast<RealType>(1.40601897306833147611e1),
+                static_cast<RealType>(1.33838075106916667084e1),
+                static_cast<RealType>(5.60958095533108032859e0),
+                static_cast<RealType>(1.11796035623375210182e0),
+                static_cast<RealType>(1.12508482637488861060e-1),
+                static_cast<RealType>(5.18503975949799718538e-3),
+            };
+
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+        }
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 2));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.2057e-18
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(8.02395484493329835881e-2),
+            static_cast<RealType>(2.46132933068351274622e-1),
+            static_cast<RealType>(2.81820176867119231101e-1),
+            static_cast<RealType>(1.47754061028371025893e-1),
+            static_cast<RealType>(3.54638964490281023406e-2),
+            static_cast<RealType>(3.99998730093393774294e-3),
+            static_cast<RealType>(3.81581928434827040262e-4),
+            static_cast<RealType>(1.82520920154354221101e-5),
+            static_cast<RealType>(-2.06151396745690348445e-7),
+            static_cast<RealType>(6.77986548138011345849e-9),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(2.39244329037830026691e0),
+            static_cast<RealType>(2.12683465416376620896e0),
+            static_cast<RealType>(9.02612272334554457823e-1),
+            static_cast<RealType>(2.06667959191488815314e-1),
+            static_cast<RealType>(2.79328968525257867541e-2),
+            static_cast<RealType>(2.28216286216537879937e-3),
+            static_cast<RealType>(1.04195690531437767679e-4),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.3944e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(1.39293493266195561875e-1),
+            static_cast<RealType>(1.26741380938661691592e-1),
+            static_cast<RealType>(4.31117040307200265931e-2),
+            static_cast<RealType>(7.50528269269498076949e-3),
+            static_cast<RealType>(8.63100497178570310436e-4),
+            static_cast<RealType>(6.75686286034521991703e-5),
+            static_cast<RealType>(3.11102625473120771882e-6),
+            static_cast<RealType>(9.63513655399980075083e-8),
+            static_cast<RealType>(-6.40223609013005302318e-11),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(8.11234548272888947555e-1),
+            static_cast<RealType>(2.63525516991753831892e-1),
+            static_cast<RealType>(4.77118226533147280522e-2),
+            static_cast<RealType>(5.46090741266888954909e-3),
+            static_cast<RealType>(4.15325425646862026425e-4),
+            static_cast<RealType>(2.02377681998442384863e-5),
+            static_cast<RealType>(5.79823311154876056655e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.1544e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(1.57911660613037760235e-1),
+            static_cast<RealType>(5.59740955695099219682e-2),
+            static_cast<RealType>(8.92895854008560399142e-3),
+            static_cast<RealType>(8.88795299273855801726e-4),
+            static_cast<RealType>(5.66358335596607738071e-5),
+            static_cast<RealType>(2.46733195253941569922e-6),
+            static_cast<RealType>(6.44829870181825872501e-8),
+            static_cast<RealType>(7.62193242864380357931e-10),
+            static_cast<RealType>(-7.82035413331699873450e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.49007782566002620811e-1),
+            static_cast<RealType>(5.65303702876260444572e-2),
+            static_cast<RealType>(5.54316442661801299351e-3),
+            static_cast<RealType>(3.58498995501703237922e-4),
+            static_cast<RealType>(1.53872913968336341278e-5),
+            static_cast<RealType>(4.08512152326482573624e-7),
+            static_cast<RealType>(4.72959615756470826429e-9),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.5877e-18
+        BOOST_MATH_STATIC const RealType P[10] = {
+            static_cast<RealType>(1.59150086070234563099e-1),
+            static_cast<RealType>(6.07144002506911115092e-2),
+            static_cast<RealType>(1.10026443723891740392e-2),
+            static_cast<RealType>(1.24892739209332398698e-3),
+            static_cast<RealType>(9.82922518655171276487e-5),
+            static_cast<RealType>(5.58366837526347222893e-6),
+            static_cast<RealType>(2.29005408647580194007e-7),
+            static_cast<RealType>(6.44325718317518336404e-9),
+            static_cast<RealType>(1.05110361316230054467e-10),
+            static_cast<RealType>(1.48083450629432857655e-18),
+        };
+        BOOST_MATH_STATIC const RealType Q[9] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(3.81470315977341203351e-1),
+            static_cast<RealType>(6.91330250512167919573e-2),
+            static_cast<RealType>(7.84712209182587717077e-3),
+            static_cast<RealType>(6.17595479676821181012e-4),
+            static_cast<RealType>(3.50829361179041199953e-5),
+            static_cast<RealType>(1.43889153071571504712e-6),
+            static_cast<RealType>(4.04840254888235877998e-8),
+            static_cast<RealType>(6.60429636407045050112e-10),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 8.7254e-17
+        BOOST_MATH_STATIC const RealType P[9] = {
+            static_cast<RealType>(1.59154943017783026201e-1),
+            static_cast<RealType>(6.91506515614472069475e-2),
+            static_cast<RealType>(1.44590186111155933843e-2),
+            static_cast<RealType>(1.92616138327724025421e-3),
+            static_cast<RealType>(1.79640147906775699469e-4),
+            static_cast<RealType>(1.30852535070639833809e-5),
+            static_cast<RealType>(5.55259657884038297268e-7),
+            static_cast<RealType>(3.50107118687544980820e-8),
+            static_cast<RealType>(-1.47102592933729597720e-22),
+        };
+        BOOST_MATH_STATIC const RealType Q[8] = {
+            static_cast<RealType>(1.),
+            static_cast<RealType>(4.34486357752330500669e-1),
+            static_cast<RealType>(9.08486933075320995164e-2),
+            static_cast<RealType>(1.21024289017243304241e-2),
+            static_cast<RealType>(1.12871233794777525784e-3),
+            static_cast<RealType>(8.22170725751776749123e-5),
+            static_cast<RealType>(3.48879932410650101194e-6),
+            static_cast<RealType>(2.19978790407451988423e-7),
+        };
+
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else {
+        result = 1 / (p * p * constants::two_pi<RealType>());
+    }
+
+    return result;
+}
+
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_upper_imp_prec(const RealType& p, const boost::math::integral_constant<int, 113>&)
+{
+    BOOST_MATH_STD_USING
+    RealType result;
+
+    if (ilogb(p) >= -2) {
+        RealType u = -log2(ldexp(p, 1));
+
+        if (u < 0.125) {
+            // Rational Approximation
+            // Maximum Relative Error: 2.5675e-36
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[31] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36099130643975133156293056139850872219e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.03940482189350763127508703926866548690e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00518276893354880480781640750482315271e4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.55844903094077096941027360107304259099e6),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04507684135310729583474324660276395831e9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28519957085041757616278379578781441623e11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26054173986187219679917530171252145632e13),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00693075272502479915569708465960917906e15),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.64153695410984136395853200311209462775e16),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.64993034609287363745840801813540992383e18),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68080300629977787949474098413155901197e20),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.50632142671665246974634799849090331338e21),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11943753054362349397013211631038480307e23),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.80601829873419334580289886671478701625e24),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33441650581633426542372642262736818512e26),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.56279427934163518272441555879970370340e27),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.08899113985387092689705022477814364717e28),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.37750989391907347952902900750138805007e29),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.76961267256299304213687639380275530721e30),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98417586455955659885944915688130612888e31),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.40932923796679251232655132670811114351e32),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.80810239916688876216017180714744912573e33),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23907429566810200929293428832485038147e33),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11441754640405256305951569489818422227e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.30534222360394829628175800718529342304e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.73301799323855143458670230536670073483e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.53142592196246595846485130434777396548e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81719621726393542967303806360105998384e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00188544550531824809437206713326495544e33),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62706943144847786115732327787879709587e32),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32129438774563059735783287456769609571e31),
+            };
+            BOOST_MATH_STATIC const RealType Q[32] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.65910866673514847742559406762379054364e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21954860438789969160116317316418373146e5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.85684385746348850219351196129081986508e7),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76131116920014625994371306210585646224e9),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.57402411617965582839975369786525269977e11),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.42213951996062253608905591667405322835e13),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.55477693883842522631954327528060778834e15),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.00397907346473927493255003955380711046e17),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76305959503723486331556274939198109922e19),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.27926540483498824808520492399128682366e21),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.98253913105291675445666919447864520248e22),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.63457445658532249936389003141915626894e24),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51446616633910582673057455450707805902e25),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04744823698010333311911891992022528040e27),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.03400927415310540137351756981742318263e28),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.28761940359662123632247441327784689568e29),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.39016138777648624292953560568071708327e30),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79639567867465767764785448609833337532e31),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.23406781975678544311073661662680006588e32),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.97261483656310352862554580475760827374e33),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.62715040832592600542933595577003951697e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.77359945057399130202830211722221279906e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07842295432910751940058270741081867701e35),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51739306780247334064265249344359460675e35),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.60574331076505049588401700048488577194e35),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.08286808700840316336961663635580879141e35),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.27661033115008662284071342245200272702e35),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00465576791024249023365007797010262700e35),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83311248273885136105510175099322638440e34),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96635220211386288597285960837372073054e33),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23849744128418288892902205619933047730e32),
+            };
+            // LCOV_EXCL_STOP
+            result = u * tools::evaluate_polynomial(P, u) / (tools::evaluate_polynomial(Q, u) * (p * p));
+        }
+        else if (u < 0.25) {
+            RealType t = u - static_cast <RealType>(0.125);
+
+            // Rational Approximation
+            // Maximum Relative Error: 9.0663e-36
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.46698650748920243663487731226111319705e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.39021286045890143123252180276484388346e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21933242816562043224009451007344301143e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33741547463966207206741888477702151242e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.29556944160837955334643715180923663741e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.25261081330476435844217173674285740857e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28690563577245995896389783271544510833e4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.51764495004238264050843085122188741180e4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.00501773552098137637598813101153206656e4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.93991776883375928647775429233323885440e4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.13059418708769178567954713937745050279e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.41565791250614170744069436181282300453e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35838723365672196069179944509778281549e1),
+            };
+            BOOST_MATH_STATIC const RealType Q[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.63888803456697300467924455320638435538e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.74785179836182339383932806919167693991e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.15133301804008879476562749311747788645e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87361675398393057971764841741518474061e4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.02992617475892211368309739891693879676e5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36356854400440662641546588001882412251e5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.35807552915245783626759227539698719908e5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75959389290929178190646034566377062463e5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18514088996371641206828142820042918681e5),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54881978220293930450469794941944831047e4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83958740186543542804045767758191509433e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26637084978098507405883170227585648985e2),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+        }
+        else if (u < 0.5) {
+            RealType t = u - static_cast <RealType>(0.25);
+
+            // Rational Approximation
+            // Maximum Relative Error: 7.1265e-35
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[14] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.69627866689346442965083437425920959525e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.28948812330446670380449765578224539665e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.38832694133021352110245148952631526683e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.70206624753427831733487031852769976576e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.34677850226082773550206949299306677736e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.18657422004942861459539366963056149110e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.90933843076824719761937043667767333536e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.78597771586582252472927601403235921029e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76489020985978559079198751910122765603e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.37662018494780327201390375334403954354e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11303058491765900888068268844399186476e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38147649159947518976483710606042789880e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.81260575060831053615857196033574207714e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, -1.26783311530618626413866321968979725353e-3),
+            };
+            BOOST_MATH_STATIC const RealType Q[13] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.98941943311823528497840052715295329781e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70142252619301982454969690308614487433e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.17472255695869018956165466705137979540e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42016169942136311355803413981032780219e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.55874385736597452997483327962434131932e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45782883079400958761816030672202996788e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05272877129840019671123017296056938361e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.79833037593794381103412381177370862105e3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.67388248713896792948592889733513376054e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.19952164110429183557842014635391021832e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.43813483967503071358907030110791934870e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.21327682641358836049127780506729428797e-1),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+        }
+        else {
+            RealType t = u - static_cast <RealType>(0.5);
+
+            // Rational Approximation
+            // Maximum Relative Error: 2.7048e-37
+            // LCOV_EXCL_START
+            BOOST_MATH_STATIC const RealType P[15] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.79518653373241051262822702930040975338e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.62230291299220868262265687829866364204e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.87315544620612697712513318458226575394e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.38993950875334507399211313740958438201e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54257654902026056547861805085572437922e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85673656862223617197701693270067722169e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47842193222521213922734312546590337064e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.76640627287007744941009407221495229316e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.71884893887802925773271837595143776207e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87432154629995817972739015224205530101e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.44664933176248007092868241686074743562e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.32094739938150047092982705610586287965e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.18537678581395571564129512698022192316e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.99365265557355974918712592061740510276e-4),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.66467016868206844419002547523627548705e-6),
+            };
+            BOOST_MATH_STATIC const RealType Q[15] = {
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01315080955831561204744043759079263546e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.43409077070585581955481063438385546913e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09863540097812452102765922256432103612e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.69971336507400724019217277303598318934e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.71444880426858110981683485927452024652e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15252748520663939799185721687082682973e2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.28399989835264172624148638350889215004e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.73464700365199500083227290575797895127e1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.40421770918884020099427978511354197438e0),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.97023025282119988988976542004620759235e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38774609088015115009880504176630591783e-1),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.52748138528630655371589047000668876440e-2),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13088455793478303045390386135591069087e-3),
+                BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.18220605549460262119565543089703387122e-5),
+            };
+            // LCOV_EXCL_STOP
+            result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+        }
+    }
+    else if (ilogb(p) >= -4) {
+        RealType t = -log2(ldexp(p, 2));
+
+        // Rational Approximation
+        // Maximum Relative Error: 3.8969e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.02395484493329839255216366819344305871e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.02703992140456336967688958960484716694e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.38779662796374026809611637926067177436e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.22326903547451397450399124548020897393e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.29321119874906326000117036864856138032e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60045794013093831332658415095234082115e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.75863216252160126657107771372004587438e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.65658718311497180532644775193008407069e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.53225259384404343896446164609240157391e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17876243295156782920260122855798305258e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59647007234516896762020830535717539733e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64519789656979327339865975091579252352e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29566724776730544346201080459027524931e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40979492647851412567441418477263395917e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78894057948338305679452471174923939381e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.97064244496171921075006182915678263370e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13496588267213644899739513941375650458e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.75224691413667093006312591320754720811e-14),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.59000688626663121310675150262772434285e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37518060227321498297232252379976917550e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.96559443266702775026538144474892076437e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.81865210018244220041408788510705356696e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15188291931842064325756652570456168425e1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.16909307081950035111952362482113369939e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.68454509269150307761046136063890222011e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.06391236761753712424925832120306727169e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35744804731044427608283991933125506859e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00655928177646208520006978937806043639e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04093230988242553633939757013466501271e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.07921031269974885975846184199640060403e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.61356596082773699708092475561216104426e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.83968520269928804453766899533464507543e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.44620973323561344735660659502096499899e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.84398925760354259350870730551452956164e-11),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -8) {
+        RealType t = -log2(ldexp(p, 4));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.0176e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.39293493266195566603513288406748830312e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.75724665658983779947977436518056682748e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.42437549740894393207094008058345312893e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.26189619865771499663660627120168211026e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.38952430871711360962228087792821341859e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.09604487371653920602809626594722822237e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06215021409396534038209460967790566899e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.02245531075243838209245241246011523536e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.52822482024384335373072062232322682354e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.32527687997718638700761890588399465467e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.54799997015944073019842889902521208940e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59368565314052950335981455903474908073e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.09459594346367728583560281313278117879e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.30296867679720593932307487485758431355e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04594079707862644415224596859620253913e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40274507498190913768918372242285652373e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.48644117815971872777609922455371868747e-16),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88079839671202113888025645668230104601e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58898753182105924446845274197682915131e0),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.05418719178760837974322764299800701708e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.76439568495464423890950166804368135632e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.87661284201828717694596419805804620767e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29462021166220769918154388930589492957e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.89960014717788045459266868996575581278e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.21759236630028632465777310665839652757e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.08884467282860764261728614542418632608e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60098870889198704716300891829788260654e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.00120123451682223443624210304146589040e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.08868117923724451329261971335574401646e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.07346130275947166224129347124306950150e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.57848230665832873347797099944091265220e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.50841502849442327828534131901583916707e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.19165038770000448560339443014882434202e-15),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -16) {
+        RealType t = -log2(ldexp(p, 8));
+
+        // Rational Approximation
+        // Maximum Relative Error: 4.1682e-36
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.57911660613037766795694241662819364797e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.28799302413396670477035614399187456630e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.87488304496324715063356722168914018093e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.15106082041721012436439208357739139578e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91744691940169259573871742836817806248e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.40707390548486625606656777332664791183e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.37047148097688601398129659532643297674e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88039545021930711122085375901243257574e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22254460725736448552173288004145978774e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.58462349007293730244197837509157696852e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.95242372547984999431208546685672497090e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10113734998651793201123616276573169622e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.38963677413425618019569452771868834246e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.45242599273032563942546507899265865936e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.64855118157117311049698715635863670233e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.31679318790012894619592273346600264199e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.97289727214495789126072009268721022605e-20),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.10184661848812835285809771940181522329e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.06179300560230499194426573196970342618e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.23171547302923911058112454487643162794e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.20486436116678834807354529081908850425e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.51239574861351183874145649960640500707e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48939385253081273966380467344920741615e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18148716720470800170115047757600735127e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.68156131480770927662478944117713742978e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.13720275846166334505537351224097058812e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.85505701632948614345319635028225905820e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.91876669388212587242659571229471930880e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.12971661051277278610784329698988278013e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.31096179726750865531615367639563072055e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.03579138802970748888093188937926461893e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.45570688568663643410924100311054014175e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.23959804461200982866930072222355142173e-19),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -32) {
+        RealType t = -log2(ldexp(p, 16));
+
+        // Rational Approximation
+        // Maximum Relative Error: 6.2158e-37
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59150086070234561732507586188017224084e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.80849532387385837583114307010320459997e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.41158479406270598752210344238285334672e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88824037165656723581890282427897772492e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.82912940787568736176030025420621547622e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.36469458704261637785603215754389736108e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.13801486421774537025334682673091205328e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.97058432407176502984043208925327069250e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.60823277541385163663463406307766614689e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.45016369260792040947272022706860047646e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.54846457278644736871929319230398689553e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.67291749890916930953794688556299297735e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.18742803398417392282841454979723852423e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.13337431668170547244474715030235433597e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.37782648734897338547414800391203459036e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.17863064141234633971470839644872485483e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.58768205048500915346781559321978174829e-24),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.27782279546086824129750042200649907991e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.86934772625607907724733810228981095894e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.18640923531164938140838239032346416143e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14927915778694317602192656254187608215e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.57462121236985574785071163024761935943e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 5.11326475640883361512750692176665937785e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.49479333407835032831192117009600622344e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.01048164044583907219965175201239136609e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.42444147056333448589159611785359705792e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.72928365136507710372724683325279209464e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.30776323450676114149657959931149982200e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.51599272091669693373558919762006698549e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.12120236145539526122748260176385899774e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.65713807525694136400636427188839379484e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.40555520515542383952495965093730818381e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.51084407433793180162386990118245623958e-23),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -64) {
+        RealType t = -log2(ldexp(p, 32));
+
+        // Rational Approximation
+        // Maximum Relative Error: 9.8515e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59154943017783040087729009335921759322e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.35955784629344586058432079844665517425e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.24333525582177610783141409282489279582e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.58257137499954581519132407255793210808e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47191495695958634792434622715063010854e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06408464185207904662485396901099847317e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.20796977470988464880970001894205834196e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.99451680244976178843047944033382023574e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.21331607817814211329055723244764031561e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.16997758215752306644496702331954449485e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.22151810180865778439184946086488092970e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.05017329554372903197056366604190738772e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.45919279055502465343977575104142733356e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.14611865933281087898817644094411667861e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.66574579315129285098834562564888533591e-19),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.90098275536617376789480602467351545227e-21),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.56200324658873566425094389271790730206e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.35526648761411463124801128103381691418e-26),
+        };
+        BOOST_MATH_STATIC const RealType Q[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.99582804063194774835771688139366152937e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.81210581320456331960539046132284190053e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.94358920922810097599951120081974275145e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.24831443209858422294319043037419780210e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.68584098673893150929178892200446909375e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.90058244779420124535106788512940199547e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.88151039746201934320258158884886191886e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.62348975553160355852344937226490493460e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.62007418751593938350474825754731470453e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.67502458979132962529935588245058477825e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.91648040348401277706598232576212305626e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.05843052379331618504561151714467880641e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.20127592059771206959014911028588129920e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.04661894930286305556240859086772458465e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.19442269177165740287568170417649762849e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.81435584873372180820418114652670864136e-23),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.62145023253666168339801687459484937001e-25),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else if (ilogb(p) >= -128) {
+        RealType t = -log2(ldexp(p, 64));
+
+        // Rational Approximation
+        // Maximum Relative Error: 2.2157e-35
+        // LCOV_EXCL_START
+        BOOST_MATH_STATIC const RealType P[18] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.59154943091895335751628149866310390641e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.91164927854420277537616294413463565970e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47557801928232619499125670863084398577e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.06172621625221091203249391660455847328e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.11720157411653968975956625234656001375e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.70086412127379161257840749700428137407e-5),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11069186177775505692019195793079552937e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.04581765901792649215653828121992908775e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.78996797234624395264657873201296117159e-9),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.10365978021268853654282661591051834468e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.76744621013787434243259445839624450867e-12),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.11110170303355425599446515240949433934e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.83669090335022069229153919882930282425e-15),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.09633460833089193733622172621696983652e-17),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.16200852052266861122422190933586966917e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.47795810090424252745150042033544310609e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.35722092370326505616747155207965300634e-22),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 8.98381676423023212724768510437325359364e-51),
+        };
+        BOOST_MATH_STATIC const RealType Q[17] = {
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 4.34271731953273239691423485699928257808e-1),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.27133013035186849140772481980360839559e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.29542078693828543560388747333519393752e-2),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.33027698228265344561650492600955601983e-3),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.06868444562964057795387778972002636261e-4),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.97868278672593071151212650783507879919e-6),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.79869926850283188785885503049178903204e-7),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.75298857713475428388051708713106549590e-8),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.93449891515741631942400181171061740767e-10),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 2.36715626731277089044713008494971829270e-11),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 6.98125789528264426960496891930942311971e-13),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.78234546049400950544724588355539821858e-14),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.83044000387150792693434128054414524740e-16),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 7.30111486296552039483196431122555524886e-18),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 9.28628462422858135083238952377446358986e-20),
+            BOOST_MATH_BIG_CONSTANT(RealType, 113, 1.48108558735886480298604270981393793162e-21),
+        };
+        // LCOV_EXCL_STOP
+        result = tools::evaluate_polynomial(P, t) / (tools::evaluate_polynomial(Q, t) * (p * p));
+    }
+    else {
+        result = 1 / (p * p * constants::two_pi<RealType>());
+    }
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 53>& tag)
+{
+    if (p > 0.5) {
+        return !complement ? saspoint5_quantile_upper_imp_prec(1 - p, tag) : -saspoint5_quantile_upper_imp_prec(1 - p, tag);
+    }
+
+    return complement ? saspoint5_quantile_upper_imp_prec(p, tag) : -saspoint5_quantile_upper_imp_prec(p, tag);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_imp_prec(const RealType& p, bool complement, const boost::math::integral_constant<int, 113>& tag)
+{
+    if (p > 0.5) {
+        return !complement ? saspoint5_quantile_upper_imp_prec(1 - p, tag) : -saspoint5_quantile_upper_imp_prec(1 - p, tag);
+    }
+
+    return complement ? saspoint5_quantile_upper_imp_prec(p, tag) : -saspoint5_quantile_upper_imp_prec(p, tag);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_quantile_imp(const saspoint5_distribution<RealType, Policy>& dist, const RealType& p, bool complement)
+{
+    // This routine implements the quantile for the Saspoint5 distribution,
+    // the value p may be the probability, or its complement if complement=true.
+
+    constexpr auto function = "boost::math::quantile(saspoint5<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+    RealType location = dist.location();
+
+    if (false == detail::check_location(function, location, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+    if (false == detail::check_probability(function, p, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = location + scale * saspoint5_quantile_imp_prec(p, complement, tag_type());
+
+    return result;
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_entropy_imp_prec(const boost::math::integral_constant<int, 53>&)
+{
+    return static_cast<RealType>(3.63992444568030649573);
+}
+
+template <class RealType>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_entropy_imp_prec(const boost::math::integral_constant<int, 113>&)
+{
+    return BOOST_MATH_BIG_CONSTANT(RealType, 113, 3.6399244456803064957308496039071853510);
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType saspoint5_entropy_imp(const saspoint5_distribution<RealType, Policy>& dist)
+{
+    // This implements the entropy for the Saspoint5 distribution,
+
+    constexpr auto function = "boost::math::entropy(saspoint5<%1%>&, %1%)";
+    BOOST_MATH_STD_USING // for ADL of std functions
+
+    RealType result = 0;
+    RealType scale = dist.scale();
+
+    if (false == detail::check_scale(function, scale, &result, Policy()))
+    {
+        return result;
+    }
+
+    typedef typename tools::promote_args<RealType>::type result_type;
+    typedef typename policies::precision<result_type, Policy>::type precision_type;
+    typedef boost::math::integral_constant<int,
+        precision_type::value <= 0 ? 0 :
+        precision_type::value <= 53 ? 53 :
+        precision_type::value <= 113 ? 113 : 0
+    > tag_type;
+
+    static_assert(tag_type::value, "The SaS point5 distribution is only implemented for types with known precision, and 113 bits or fewer in the mantissa (ie 128 bit quad-floats");
+
+    result = saspoint5_entropy_imp_prec<RealType>(tag_type()) + log(scale);
+
+    return result;
+}
+
+} // detail
+
+template <class RealType = double, class Policy = policies::policy<> >
+class saspoint5_distribution
+{
+    public:
+    typedef RealType value_type;
+    typedef Policy policy_type;
+
+    BOOST_MATH_GPU_ENABLED saspoint5_distribution(RealType l_location = 0, RealType l_scale = 1)
+        : mu(l_location), c(l_scale)
+    {
+        constexpr auto function = "boost::math::saspoint5_distribution<%1%>::saspoint5_distribution";
+        RealType result = 0;
+        detail::check_location(function, l_location, &result, Policy());
+        detail::check_scale(function, l_scale, &result, Policy());
+    } // saspoint5_distribution
+
+    BOOST_MATH_GPU_ENABLED RealType location()const
+    {
+        return mu;
+    }
+    BOOST_MATH_GPU_ENABLED RealType scale()const
+    {
+        return c;
+    }
+
+    private:
+    RealType mu;    // The location parameter.
+    RealType c;     // The scale parameter.
+};
+
+typedef saspoint5_distribution<double> saspoint5;
+
+#ifdef __cpp_deduction_guides
+template <class RealType>
+saspoint5_distribution(RealType) -> saspoint5_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+template <class RealType>
+saspoint5_distribution(RealType, RealType) -> saspoint5_distribution<typename boost::math::tools::promote_args<RealType>::type>;
+#endif
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const saspoint5_distribution<RealType, Policy>&)
+{ // Range of permissible values for random variable x.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const saspoint5_distribution<RealType, Policy>&)
+{ // Range of supported values for random variable x.
+   // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
+    BOOST_MATH_IF_CONSTEXPR (boost::math::numeric_limits<RealType>::has_infinity)
+    {
+        return boost::math::pair<RealType, RealType>(-boost::math::numeric_limits<RealType>::infinity(), boost::math::numeric_limits<RealType>::infinity()); // - to + infinity.
+    }
+    else
+    { // Can only use max_value.
+        using boost::math::tools::max_value;
+        return boost::math::pair<RealType, RealType>(-tools::max_value<RealType>(), max_value<RealType>()); // - to + max.
+    }
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const saspoint5_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::saspoint5_pdf_imp(dist, x);
+} // pdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const saspoint5_distribution<RealType, Policy>& dist, const RealType& x)
+{
+    return detail::saspoint5_cdf_imp(dist, x, false);
+} // cdf
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const saspoint5_distribution<RealType, Policy>& dist, const RealType& p)
+{
+    return detail::saspoint5_quantile_imp(dist, p, false);
+} // quantile
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<saspoint5_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::saspoint5_cdf_imp(c.dist, c.param, true);
+} //  cdf complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<saspoint5_distribution<RealType, Policy>, RealType>& c)
+{
+    return detail::saspoint5_quantile_imp(c.dist, c.param, true);
+} // quantile complement
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mean(const saspoint5_distribution<RealType, Policy> &dist)
+{
+    // There is no mean:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The SaS point5 Distribution has no mean");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::mean(saspoint5<%1%>&)",
+        "The SaS point5 distribution does not have a mean: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType variance(const saspoint5_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no variance:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The SaS point5 Distribution has no variance");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::variance(saspoint5<%1%>&)",
+        "The SaS point5 distribution does not have a variance: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType mode(const saspoint5_distribution<RealType, Policy>& dist)
+{
+    return dist.location();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType median(const saspoint5_distribution<RealType, Policy>& dist)
+{
+    return dist.location();
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const saspoint5_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no skewness:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The SaS point5 Distribution has no skewness");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::skewness(saspoint5<%1%>&)",
+        "The SaS point5 distribution does not have a skewness: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy()); // infinity?
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const saspoint5_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The SaS point5 Distribution has no kurtosis");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis(saspoint5<%1%>&)",
+        "The SaS point5 distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const saspoint5_distribution<RealType, Policy>& /*dist*/)
+{
+    // There is no kurtosis excess:
+    typedef typename Policy::assert_undefined_type assert_type;
+    static_assert(assert_type::value == 0, "The SaS point5 Distribution has no kurtosis excess");
+
+    return policies::raise_domain_error<RealType>(
+        "boost::math::kurtosis_excess(saspoint5<%1%>&)",
+        "The SaS point5 distribution does not have a kurtosis: "
+        "the only possible return value is %1%.",
+        boost::math::numeric_limits<RealType>::quiet_NaN(), Policy());
+}
+
+template <class RealType, class Policy>
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const saspoint5_distribution<RealType, Policy>& dist)
+{
+    return detail::saspoint5_entropy_imp(dist);
+}
+
+}} // namespaces
+
+
+#endif // BOOST_STATS_SASPOINT5_HPP
diff --git a/include/boost/math/distributions/students_t.hpp b/include/boost/math/distributions/students_t.hpp
index b01b8aa0fc..39f20d6e41 100644
--- a/include/boost/math/distributions/students_t.hpp
+++ b/include/boost/math/distributions/students_t.hpp
@@ -1,7 +1,7 @@
 //  Copyright John Maddock 2006.
 //  Copyright Paul A. Bristow 2006, 2012, 2017.
 //  Copyright Thomas Mang 2012.
-
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,14 +12,17 @@
 // http://en.wikipedia.org/wiki/Student%27s_t_distribution
 // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3664.htm
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x).
 #include <boost/math/special_functions/digamma.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/normal.hpp> 
-
-#include <utility>
+#include <boost/math/policies/policy.hpp>
 
 #ifdef _MSC_VER
 # pragma warning(push)
@@ -35,20 +38,20 @@ class students_t_distribution
    typedef RealType value_type;
    typedef Policy policy_type;
 
-   students_t_distribution(RealType df) : df_(df)
+   BOOST_MATH_GPU_ENABLED students_t_distribution(RealType df) : df_(df)
    { // Constructor.
       RealType result;
       detail::check_df_gt0_to_inf( // Checks that df > 0 or df == inf.
          "boost::math::students_t_distribution<%1%>::students_t_distribution", df_, &result, Policy());
    } // students_t_distribution
 
-   RealType degrees_of_freedom()const
+   BOOST_MATH_GPU_ENABLED RealType degrees_of_freedom()const
    {
       return df_;
    }
 
    // Parameter estimation:
-   static RealType find_degrees_of_freedom(
+   BOOST_MATH_GPU_ENABLED static RealType find_degrees_of_freedom(
       RealType difference_from_mean,
       RealType alpha,
       RealType beta,
@@ -68,26 +71,26 @@ students_t_distribution(RealType)->students_t_distribution<typename boost::math:
 #endif
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> range(const students_t_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const students_t_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
   // Now including infinity.
    using boost::math::tools::max_value;
-   //return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>());
-   return std::pair<RealType, RealType>(((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? -std::numeric_limits<RealType>::infinity() : -max_value<RealType>()), ((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? +std::numeric_limits<RealType>::infinity() : +max_value<RealType>()));
+   //return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(((::boost::math::numeric_limits<RealType>::is_specialized & ::boost::math::numeric_limits<RealType>::has_infinity) ? -boost::math::numeric_limits<RealType>::infinity() : -max_value<RealType>()), ((::boost::math::numeric_limits<RealType>::is_specialized & ::boost::math::numeric_limits<RealType>::has_infinity) ? +boost::math::numeric_limits<RealType>::infinity() : +max_value<RealType>()));
 }
 
 template <class RealType, class Policy>
-inline const std::pair<RealType, RealType> support(const students_t_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const students_t_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
   // Now including infinity.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
-   //return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>());
-   return std::pair<RealType, RealType>(((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? -std::numeric_limits<RealType>::infinity() : -max_value<RealType>()), ((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? +std::numeric_limits<RealType>::infinity() : +max_value<RealType>()));
+   //return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(((::boost::math::numeric_limits<RealType>::is_specialized & ::boost::math::numeric_limits<RealType>::has_infinity) ? -boost::math::numeric_limits<RealType>::infinity() : -max_value<RealType>()), ((::boost::math::numeric_limits<RealType>::is_specialized & ::boost::math::numeric_limits<RealType>::has_infinity) ? +boost::math::numeric_limits<RealType>::infinity() : +max_value<RealType>()));
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const students_t_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const students_t_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_FPU_EXCEPTION_GUARD
    BOOST_MATH_STD_USING  // for ADL of std functions.
@@ -135,7 +138,7 @@ inline RealType pdf(const students_t_distribution<RealType, Policy>& dist, const
 } // pdf
 
 template <class RealType, class Policy>
-inline RealType cdf(const students_t_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const students_t_distribution<RealType, Policy>& dist, const RealType& x)
 {
    RealType error_result;
    // degrees_of_freedom > 0 or infinity check:
@@ -209,7 +212,7 @@ inline RealType cdf(const students_t_distribution<RealType, Policy>& dist, const
 } // cdf
 
 template <class RealType, class Policy>
-inline RealType quantile(const students_t_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const students_t_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING // for ADL of std functions
    //
@@ -218,7 +221,7 @@ inline RealType quantile(const students_t_distribution<RealType, Policy>& dist,
  
    // Check for domain errors:
    RealType df = dist.degrees_of_freedom();
-   static const char* function = "boost::math::quantile(const students_t_distribution<%1%>&, %1%)";
+   constexpr auto function = "boost::math::quantile(const students_t_distribution<%1%>&, %1%)";
    RealType error_result;
    if(false == (detail::check_df_gt0_to_inf( // Check that df > 0 or == +infinity.
       function, df, &error_result, Policy())
@@ -263,13 +266,13 @@ inline RealType quantile(const students_t_distribution<RealType, Policy>& dist,
 } // quantile
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<students_t_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<students_t_distribution<RealType, Policy>, RealType>& c)
 {
    return cdf(c.dist, -c.param);
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<students_t_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<students_t_distribution<RealType, Policy>, RealType>& c)
 {
    return -quantile(c.dist, c.param);
 }
@@ -284,10 +287,10 @@ namespace detail{
 template <class RealType, class Policy>
 struct sample_size_func
 {
-   sample_size_func(RealType a, RealType b, RealType s, RealType d)
+   BOOST_MATH_GPU_ENABLED sample_size_func(RealType a, RealType b, RealType s, RealType d)
       : alpha(a), beta(b), ratio(s*s/(d*d)) {}
 
-   RealType operator()(const RealType& df)
+   BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df)
    {
       if(df <= tools::min_value<RealType>())
       { // 
@@ -308,14 +311,14 @@ struct sample_size_func
 }  // namespace detail
 
 template <class RealType, class Policy>
-RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom(
+BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom(
       RealType difference_from_mean,
       RealType alpha,
       RealType beta,
       RealType sd,
       RealType hint)
 {
-   static const char* function = "boost::math::students_t_distribution<%1%>::find_degrees_of_freedom";
+   constexpr auto function = "boost::math::students_t_distribution<%1%>::find_degrees_of_freedom";
    //
    // Check for domain errors:
    //
@@ -330,8 +333,8 @@ RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom(
 
    detail::sample_size_func<RealType, Policy> f(alpha, beta, sd, difference_from_mean);
    tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>());
-   std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
-   std::pair<RealType, RealType> r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy());
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+   boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy());
    RealType result = r.first + (r.second - r.first) / 2;
    if(max_iter >= policies::get_max_root_iterations<Policy>())
    {
@@ -342,14 +345,14 @@ RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom(
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const students_t_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const students_t_distribution<RealType, Policy>& /*dist*/)
 {
   // Assume no checks on degrees of freedom are useful (unlike mean).
    return 0; // Always zero by definition.
 }
 
 template <class RealType, class Policy>
-inline RealType median(const students_t_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline RealType median(const students_t_distribution<RealType, Policy>& /*dist*/)
 {
    // Assume no checks on degrees of freedom are useful (unlike mean).
    return 0; // Always zero by definition.
@@ -358,7 +361,7 @@ inline RealType median(const students_t_distribution<RealType, Policy>& /*dist*/
 // See section 5.1 on moments at  http://en.wikipedia.org/wiki/Student%27s_t-distribution
 
 template <class RealType, class Policy>
-inline RealType mean(const students_t_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const students_t_distribution<RealType, Policy>& dist)
 {  // Revised for https://svn.boost.org/trac/boost/ticket/7177
    RealType df = dist.degrees_of_freedom();
    if(((boost::math::isnan)(df)) || (df <= 1) ) 
@@ -366,13 +369,13 @@ inline RealType mean(const students_t_distribution<RealType, Policy>& dist)
       return policies::raise_domain_error<RealType>(
       "boost::math::mean(students_t_distribution<%1%> const&, %1%)",
       "Mean is undefined for degrees of freedom < 1 but got %1%.", df, Policy());
-      return std::numeric_limits<RealType>::quiet_NaN();
+      return boost::math::numeric_limits<RealType>::quiet_NaN();
    }
    return 0;
 } // mean
 
 template <class RealType, class Policy>
-inline RealType variance(const students_t_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const students_t_distribution<RealType, Policy>& dist)
 { // http://en.wikipedia.org/wiki/Student%27s_t-distribution
   // Revised for https://svn.boost.org/trac/boost/ticket/7177
   RealType df = dist.degrees_of_freedom();
@@ -382,7 +385,7 @@ inline RealType variance(const students_t_distribution<RealType, Policy>& dist)
       "boost::math::variance(students_t_distribution<%1%> const&, %1%)",
       "variance is undefined for degrees of freedom <= 2, but got %1%.",
       df, Policy());
-    return std::numeric_limits<RealType>::quiet_NaN(); // Undefined.
+    return boost::math::numeric_limits<RealType>::quiet_NaN(); // Undefined.
   }
   if ((boost::math::isinf)(df))
   { // +infinity.
@@ -404,7 +407,7 @@ inline RealType variance(const students_t_distribution<RealType, Policy>& dist)
 } // variance
 
 template <class RealType, class Policy>
-inline RealType skewness(const students_t_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const students_t_distribution<RealType, Policy>& dist)
 {
     RealType df = dist.degrees_of_freedom();
    if( ((boost::math::isnan)(df)) || (dist.degrees_of_freedom() <= 3))
@@ -413,13 +416,13 @@ inline RealType skewness(const students_t_distribution<RealType, Policy>& dist)
          "boost::math::skewness(students_t_distribution<%1%> const&, %1%)",
          "Skewness is undefined for degrees of freedom <= 3, but got %1%.",
          dist.degrees_of_freedom(), Policy());
-      return std::numeric_limits<RealType>::quiet_NaN();
+      return boost::math::numeric_limits<RealType>::quiet_NaN();
    }
    return 0; // For all valid df, including infinity.
 } // skewness
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const students_t_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const students_t_distribution<RealType, Policy>& dist)
 {
    RealType df = dist.degrees_of_freedom();
    if(((boost::math::isnan)(df)) || (df <= 4))
@@ -428,7 +431,7 @@ inline RealType kurtosis(const students_t_distribution<RealType, Policy>& dist)
        "boost::math::kurtosis(students_t_distribution<%1%> const&, %1%)",
        "Kurtosis is undefined for degrees of freedom <= 4, but got %1%.",
         df, Policy());
-        return std::numeric_limits<RealType>::quiet_NaN(); // Undefined.
+        return boost::math::numeric_limits<RealType>::quiet_NaN(); // Undefined.
    }
    if ((boost::math::isinf)(df))
    { // +infinity.
@@ -451,7 +454,7 @@ inline RealType kurtosis(const students_t_distribution<RealType, Policy>& dist)
 } // kurtosis
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const students_t_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const students_t_distribution<RealType, Policy>& dist)
 {
    // see http://mathworld.wolfram.com/Kurtosis.html
 
@@ -462,7 +465,7 @@ inline RealType kurtosis_excess(const students_t_distribution<RealType, Policy>&
        "boost::math::kurtosis_excess(students_t_distribution<%1%> const&, %1%)",
        "Kurtosis_excess is undefined for degrees of freedom <= 4, but got %1%.",
       df, Policy());
-     return std::numeric_limits<RealType>::quiet_NaN(); // Undefined.
+     return boost::math::numeric_limits<RealType>::quiet_NaN(); // Undefined.
    }
    if ((boost::math::isinf)(df))
    { // +infinity.
@@ -484,10 +487,9 @@ inline RealType kurtosis_excess(const students_t_distribution<RealType, Policy>&
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const students_t_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const students_t_distribution<RealType, Policy>& dist)
 {
-   using std::log;
-   using std::sqrt;
+   BOOST_MATH_STD_USING
    RealType v = dist.degrees_of_freedom();
    RealType vp1 = (v+1)/2;
    RealType vd2 = v/2;
diff --git a/include/boost/math/distributions/triangular.hpp b/include/boost/math/distributions/triangular.hpp
index 950d78147f..b333ddbc31 100644
--- a/include/boost/math/distributions/triangular.hpp
+++ b/include/boost/math/distributions/triangular.hpp
@@ -16,20 +16,20 @@
 
 // http://en.wikipedia.org/wiki/Triangular_distribution
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/expm1.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/complement.hpp>
 #include <boost/math/constants/constants.hpp>
 
-#include <utility>
-
 namespace boost{ namespace math
 {
   namespace detail
   {
     template <class RealType, class Policy>
-    inline bool check_triangular_lower(
+    BOOST_MATH_GPU_ENABLED inline bool check_triangular_lower(
       const char* function,
       RealType lower,
       RealType* result, const Policy& pol)
@@ -48,7 +48,7 @@ namespace boost{ namespace math
     } // bool check_triangular_lower(
 
     template <class RealType, class Policy>
-    inline bool check_triangular_mode(
+    BOOST_MATH_GPU_ENABLED inline bool check_triangular_mode(
       const char* function,
       RealType mode,
       RealType* result, const Policy& pol)
@@ -67,7 +67,7 @@ namespace boost{ namespace math
     } // bool check_triangular_mode(
 
     template <class RealType, class Policy>
-    inline bool check_triangular_upper(
+    BOOST_MATH_GPU_ENABLED inline bool check_triangular_upper(
       const char* function,
       RealType upper,
       RealType* result, const Policy& pol)
@@ -86,7 +86,7 @@ namespace boost{ namespace math
     } // bool check_triangular_upper(
 
     template <class RealType, class Policy>
-    inline bool check_triangular_x(
+    BOOST_MATH_GPU_ENABLED inline bool check_triangular_x(
       const char* function,
       RealType const& x,
       RealType* result, const Policy& pol)
@@ -105,7 +105,7 @@ namespace boost{ namespace math
     } // bool check_triangular_x
 
     template <class RealType, class Policy>
-    inline bool check_triangular(
+    BOOST_MATH_GPU_ENABLED inline bool check_triangular(
       const char* function,
       RealType lower,
       RealType mode,
@@ -153,7 +153,7 @@ namespace boost{ namespace math
     typedef RealType value_type;
     typedef Policy policy_type;
 
-    triangular_distribution(RealType l_lower = -1, RealType l_mode = 0, RealType l_upper = 1)
+    BOOST_MATH_GPU_ENABLED triangular_distribution(RealType l_lower = -1, RealType l_mode = 0, RealType l_upper = 1)
       : m_lower(l_lower), m_mode(l_mode), m_upper(l_upper) // Constructor.
     { // Evans says 'standard triangular' is lower 0, mode 1/2, upper 1,
       // has median sqrt(c/2) for c <=1/2 and 1 - sqrt(1-c)/2 for c >= 1/2
@@ -163,15 +163,15 @@ namespace boost{ namespace math
       detail::check_triangular("boost::math::triangular_distribution<%1%>::triangular_distribution",l_lower, l_mode, l_upper, &result, Policy());
     }
     // Accessor functions.
-    RealType lower()const
+    BOOST_MATH_GPU_ENABLED RealType lower()const
     {
       return m_lower;
     }
-    RealType mode()const
+    BOOST_MATH_GPU_ENABLED RealType mode()const
     {
       return m_mode;
     }
-    RealType upper()const
+    BOOST_MATH_GPU_ENABLED RealType upper()const
     {
       return m_upper;
     }
@@ -194,23 +194,23 @@ namespace boost{ namespace math
   #endif
 
   template <class RealType, class Policy>
-  inline const std::pair<RealType, RealType> range(const triangular_distribution<RealType, Policy>& /* dist */)
+  BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const triangular_distribution<RealType, Policy>& /* dist */)
   { // Range of permissible values for random variable x.
     using boost::math::tools::max_value;
-    return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>());
+    return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>());
   }
 
   template <class RealType, class Policy>
-  inline const std::pair<RealType, RealType> support(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const triangular_distribution<RealType, Policy>& dist)
   { // Range of supported values for random variable x.
     // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
-    return std::pair<RealType, RealType>(dist.lower(), dist.upper());
+    return boost::math::pair<RealType, RealType>(dist.lower(), dist.upper());
   }
 
   template <class RealType, class Policy>
-  RealType pdf(const triangular_distribution<RealType, Policy>& dist, const RealType& x)
+  BOOST_MATH_GPU_ENABLED RealType pdf(const triangular_distribution<RealType, Policy>& dist, const RealType& x)
   {
-    static const char* function = "boost::math::pdf(const triangular_distribution<%1%>&, %1%)";
+    constexpr auto function = "boost::math::pdf(const triangular_distribution<%1%>&, %1%)";
     RealType lower = dist.lower();
     RealType mode = dist.mode();
     RealType upper = dist.upper();
@@ -246,9 +246,9 @@ namespace boost{ namespace math
   } // RealType pdf(const triangular_distribution<RealType, Policy>& dist, const RealType& x)
 
   template <class RealType, class Policy>
-  inline RealType cdf(const triangular_distribution<RealType, Policy>& dist, const RealType& x)
+  BOOST_MATH_GPU_ENABLED inline RealType cdf(const triangular_distribution<RealType, Policy>& dist, const RealType& x)
   {
-    static const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)";
+    constexpr auto function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)";
     RealType lower = dist.lower();
     RealType mode = dist.mode();
     RealType upper = dist.upper();
@@ -281,10 +281,10 @@ namespace boost{ namespace math
   } // RealType cdf(const triangular_distribution<RealType, Policy>& dist, const RealType& x)
 
   template <class RealType, class Policy>
-  RealType quantile(const triangular_distribution<RealType, Policy>& dist, const RealType& p)
+  BOOST_MATH_GPU_ENABLED RealType quantile(const triangular_distribution<RealType, Policy>& dist, const RealType& p)
   {
     BOOST_MATH_STD_USING  // for ADL of std functions (sqrt).
-    static const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)";
+    constexpr auto function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)";
     RealType lower = dist.lower();
     RealType mode = dist.mode();
     RealType upper = dist.upper();
@@ -324,9 +324,9 @@ namespace boost{ namespace math
   } // RealType quantile(const triangular_distribution<RealType, Policy>& dist, const RealType& q)
 
   template <class RealType, class Policy>
-  RealType cdf(const complemented2_type<triangular_distribution<RealType, Policy>, RealType>& c)
+  BOOST_MATH_GPU_ENABLED RealType cdf(const complemented2_type<triangular_distribution<RealType, Policy>, RealType>& c)
   {
-    static const char* function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)";
+    constexpr auto function = "boost::math::cdf(const triangular_distribution<%1%>&, %1%)";
     RealType lower = c.dist.lower();
     RealType mode = c.dist.mode();
     RealType upper = c.dist.upper();
@@ -359,10 +359,10 @@ namespace boost{ namespace math
   } // RealType cdf(const complemented2_type<triangular_distribution<RealType, Policy>, RealType>& c)
 
   template <class RealType, class Policy>
-  RealType quantile(const complemented2_type<triangular_distribution<RealType, Policy>, RealType>& c)
+  BOOST_MATH_GPU_ENABLED RealType quantile(const complemented2_type<triangular_distribution<RealType, Policy>, RealType>& c)
   {
     BOOST_MATH_STD_USING  // Aid ADL for sqrt.
-    static const char* function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)";
+    constexpr auto function = "boost::math::quantile(const triangular_distribution<%1%>&, %1%)";
     RealType l = c.dist.lower();
     RealType m = c.dist.mode();
     RealType u = c.dist.upper();
@@ -408,9 +408,9 @@ namespace boost{ namespace math
   } // RealType quantile(const complemented2_type<triangular_distribution<RealType, Policy>, RealType>& c)
 
   template <class RealType, class Policy>
-  inline RealType mean(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType mean(const triangular_distribution<RealType, Policy>& dist)
   {
-    static const char* function = "boost::math::mean(const triangular_distribution<%1%>&)";
+    constexpr auto function = "boost::math::mean(const triangular_distribution<%1%>&)";
     RealType lower = dist.lower();
     RealType mode = dist.mode();
     RealType upper = dist.upper();
@@ -424,9 +424,9 @@ namespace boost{ namespace math
 
 
   template <class RealType, class Policy>
-  inline RealType variance(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType variance(const triangular_distribution<RealType, Policy>& dist)
   {
-    static const char* function = "boost::math::mean(const triangular_distribution<%1%>&)";
+    constexpr auto function = "boost::math::mean(const triangular_distribution<%1%>&)";
     RealType lower = dist.lower();
     RealType mode = dist.mode();
     RealType upper = dist.upper();
@@ -439,9 +439,9 @@ namespace boost{ namespace math
   } // RealType variance(const triangular_distribution<RealType, Policy>& dist)
 
   template <class RealType, class Policy>
-  inline RealType mode(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType mode(const triangular_distribution<RealType, Policy>& dist)
   {
-    static const char* function = "boost::math::mode(const triangular_distribution<%1%>&)";
+    constexpr auto function = "boost::math::mode(const triangular_distribution<%1%>&)";
     RealType mode = dist.mode();
     RealType result = 0; // of checks.
     if(false == detail::check_triangular_mode(function, mode, &result, Policy()))
@@ -452,10 +452,10 @@ namespace boost{ namespace math
   } // RealType mode
 
   template <class RealType, class Policy>
-  inline RealType median(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType median(const triangular_distribution<RealType, Policy>& dist)
   {
     BOOST_MATH_STD_USING // ADL of std functions.
-    static const char* function = "boost::math::median(const triangular_distribution<%1%>&)";
+    constexpr auto function = "boost::math::median(const triangular_distribution<%1%>&)";
     RealType mode = dist.mode();
     RealType result = 0; // of checks.
     if(false == detail::check_triangular_mode(function, mode, &result, Policy()))
@@ -475,11 +475,11 @@ namespace boost{ namespace math
   } // RealType mode
 
   template <class RealType, class Policy>
-  inline RealType skewness(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType skewness(const triangular_distribution<RealType, Policy>& dist)
   {
     BOOST_MATH_STD_USING  // for ADL of std functions
     using namespace boost::math::constants; // for root_two
-    static const char* function = "boost::math::skewness(const triangular_distribution<%1%>&)";
+    constexpr auto function = "boost::math::skewness(const triangular_distribution<%1%>&)";
 
     RealType lower = dist.lower();
     RealType mode = dist.mode();
@@ -496,9 +496,9 @@ namespace boost{ namespace math
   } // RealType skewness(const triangular_distribution<RealType, Policy>& dist)
 
   template <class RealType, class Policy>
-  inline RealType kurtosis(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const triangular_distribution<RealType, Policy>& dist)
   { // These checks may be belt and braces as should have been checked on construction?
-    static const char* function = "boost::math::kurtosis(const triangular_distribution<%1%>&)";
+    constexpr auto function = "boost::math::kurtosis(const triangular_distribution<%1%>&)";
     RealType lower = dist.lower();
     RealType upper = dist.upper();
     RealType mode = dist.mode();
@@ -511,9 +511,9 @@ namespace boost{ namespace math
   } // RealType kurtosis_excess(const triangular_distribution<RealType, Policy>& dist)
 
   template <class RealType, class Policy>
-  inline RealType kurtosis_excess(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const triangular_distribution<RealType, Policy>& dist)
   { // These checks may be belt and braces as should have been checked on construction?
-    static const char* function = "boost::math::kurtosis_excess(const triangular_distribution<%1%>&)";
+    constexpr auto function = "boost::math::kurtosis_excess(const triangular_distribution<%1%>&)";
     RealType lower = dist.lower();
     RealType upper = dist.upper();
     RealType mode = dist.mode();
@@ -527,9 +527,9 @@ namespace boost{ namespace math
   }
 
   template <class RealType, class Policy>
-  inline RealType entropy(const triangular_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType entropy(const triangular_distribution<RealType, Policy>& dist)
   {
-    using std::log;
+    BOOST_MATH_STD_USING
     return constants::half<RealType>() + log((dist.upper() - dist.lower())/2);
   }
 
diff --git a/include/boost/math/distributions/uniform.hpp b/include/boost/math/distributions/uniform.hpp
index f57f8cc9f1..328fc61330 100644
--- a/include/boost/math/distributions/uniform.hpp
+++ b/include/boost/math/distributions/uniform.hpp
@@ -1,5 +1,6 @@
 //  Copyright John Maddock 2006.
 //  Copyright Paul A. Bristow 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -15,18 +16,18 @@
 // http://documents.wolfram.com/calculationcenter/v2/Functions/ListsMatrices/Statistics/UniformDistribution.html
 // http://en.wikipedia.org/wiki/Uniform_distribution_%28continuous%29
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/complement.hpp>
 
-#include <utility>
-
 namespace boost{ namespace math
 {
   namespace detail
   {
     template <class RealType, class Policy>
-    inline bool check_uniform_lower(
+    BOOST_MATH_GPU_ENABLED inline bool check_uniform_lower(
       const char* function,
       RealType lower,
       RealType* result, const Policy& pol)
@@ -45,7 +46,7 @@ namespace boost{ namespace math
     } // bool check_uniform_lower(
 
     template <class RealType, class Policy>
-    inline bool check_uniform_upper(
+    BOOST_MATH_GPU_ENABLED inline bool check_uniform_upper(
       const char* function,
       RealType upper,
       RealType* result, const Policy& pol)
@@ -64,7 +65,7 @@ namespace boost{ namespace math
     } // bool check_uniform_upper(
 
     template <class RealType, class Policy>
-    inline bool check_uniform_x(
+    BOOST_MATH_GPU_ENABLED inline bool check_uniform_x(
       const char* function,
       RealType const& x,
       RealType* result, const Policy& pol)
@@ -83,7 +84,7 @@ namespace boost{ namespace math
     } // bool check_uniform_x
 
     template <class RealType, class Policy>
-    inline bool check_uniform(
+    BOOST_MATH_GPU_ENABLED inline bool check_uniform(
       const char* function,
       RealType lower,
       RealType upper,
@@ -116,19 +117,19 @@ namespace boost{ namespace math
     typedef RealType value_type;
     typedef Policy policy_type;
 
-    uniform_distribution(RealType l_lower = 0, RealType l_upper = 1) // Constructor.
+    BOOST_MATH_GPU_ENABLED uniform_distribution(RealType l_lower = 0, RealType l_upper = 1) // Constructor.
       : m_lower(l_lower), m_upper(l_upper) // Default is standard uniform distribution.
     {
       RealType result;
       detail::check_uniform("boost::math::uniform_distribution<%1%>::uniform_distribution", l_lower, l_upper, &result, Policy());
     }
     // Accessor functions.
-    RealType lower()const
+    BOOST_MATH_GPU_ENABLED RealType lower()const
     {
       return m_lower;
     }
 
-    RealType upper()const
+    BOOST_MATH_GPU_ENABLED RealType upper()const
     {
       return m_upper;
     }
@@ -148,23 +149,23 @@ namespace boost{ namespace math
   #endif
 
   template <class RealType, class Policy>
-  inline const std::pair<RealType, RealType> range(const uniform_distribution<RealType, Policy>& /* dist */)
+  BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> range(const uniform_distribution<RealType, Policy>& /* dist */)
   { // Range of permissible values for random variable x.
      using boost::math::tools::max_value;
-     return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + 'infinity'.
+     return boost::math::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); // - to + 'infinity'.
      // Note RealType infinity is NOT permitted, only max_value.
   }
 
   template <class RealType, class Policy>
-  inline const std::pair<RealType, RealType> support(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline const boost::math::pair<RealType, RealType> support(const uniform_distribution<RealType, Policy>& dist)
   { // Range of supported values for random variable x.
      // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
      using boost::math::tools::max_value;
-     return std::pair<RealType, RealType>(dist.lower(),  dist.upper());
+     return boost::math::pair<RealType, RealType>(dist.lower(),  dist.upper());
   }
 
   template <class RealType, class Policy>
-  inline RealType pdf(const uniform_distribution<RealType, Policy>& dist, const RealType& x)
+  BOOST_MATH_GPU_ENABLED inline RealType pdf(const uniform_distribution<RealType, Policy>& dist, const RealType& x)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -189,7 +190,7 @@ namespace boost{ namespace math
   } // RealType pdf(const uniform_distribution<RealType, Policy>& dist, const RealType& x)
 
   template <class RealType, class Policy>
-  inline RealType cdf(const uniform_distribution<RealType, Policy>& dist, const RealType& x)
+  BOOST_MATH_GPU_ENABLED inline RealType cdf(const uniform_distribution<RealType, Policy>& dist, const RealType& x)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -214,7 +215,7 @@ namespace boost{ namespace math
   } // RealType cdf(const uniform_distribution<RealType, Policy>& dist, const RealType& x)
 
   template <class RealType, class Policy>
-  inline RealType quantile(const uniform_distribution<RealType, Policy>& dist, const RealType& p)
+  BOOST_MATH_GPU_ENABLED inline RealType quantile(const uniform_distribution<RealType, Policy>& dist, const RealType& p)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -239,7 +240,7 @@ namespace boost{ namespace math
   } // RealType quantile(const uniform_distribution<RealType, Policy>& dist, const RealType& p)
 
   template <class RealType, class Policy>
-  inline RealType cdf(const complemented2_type<uniform_distribution<RealType, Policy>, RealType>& c)
+  BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<uniform_distribution<RealType, Policy>, RealType>& c)
   {
     RealType lower = c.dist.lower();
     RealType upper = c.dist.upper();
@@ -265,7 +266,7 @@ namespace boost{ namespace math
   } // RealType cdf(const complemented2_type<uniform_distribution<RealType, Policy>, RealType>& c)
 
   template <class RealType, class Policy>
-  inline RealType quantile(const complemented2_type<uniform_distribution<RealType, Policy>, RealType>& c)
+  BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<uniform_distribution<RealType, Policy>, RealType>& c)
   {
     RealType lower = c.dist.lower();
     RealType upper = c.dist.upper();
@@ -291,7 +292,7 @@ namespace boost{ namespace math
   } // RealType quantile(const complemented2_type<uniform_distribution<RealType, Policy>, RealType>& c)
 
   template <class RealType, class Policy>
-  inline RealType mean(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType mean(const uniform_distribution<RealType, Policy>& dist)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -304,7 +305,7 @@ namespace boost{ namespace math
   } // RealType mean(const uniform_distribution<RealType, Policy>& dist)
 
   template <class RealType, class Policy>
-  inline RealType variance(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType variance(const uniform_distribution<RealType, Policy>& dist)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -318,7 +319,7 @@ namespace boost{ namespace math
   } // RealType variance(const uniform_distribution<RealType, Policy>& dist)
 
   template <class RealType, class Policy>
-  inline RealType mode(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType mode(const uniform_distribution<RealType, Policy>& dist)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -332,7 +333,7 @@ namespace boost{ namespace math
   }
 
   template <class RealType, class Policy>
-  inline RealType median(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType median(const uniform_distribution<RealType, Policy>& dist)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -344,7 +345,7 @@ namespace boost{ namespace math
     return (lower + upper) / 2; //
   }
   template <class RealType, class Policy>
-  inline RealType skewness(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType skewness(const uniform_distribution<RealType, Policy>& dist)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -357,7 +358,7 @@ namespace boost{ namespace math
   } // RealType skewness(const uniform_distribution<RealType, Policy>& dist)
 
   template <class RealType, class Policy>
-  inline RealType kurtosis_excess(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const uniform_distribution<RealType, Policy>& dist)
   {
     RealType lower = dist.lower();
     RealType upper = dist.upper();
@@ -370,15 +371,15 @@ namespace boost{ namespace math
   } // RealType kurtosis_excess(const uniform_distribution<RealType, Policy>& dist)
 
   template <class RealType, class Policy>
-  inline RealType kurtosis(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const uniform_distribution<RealType, Policy>& dist)
   {
     return kurtosis_excess(dist) + 3;
   }
 
   template <class RealType, class Policy>
-  inline RealType entropy(const uniform_distribution<RealType, Policy>& dist)
+  BOOST_MATH_GPU_ENABLED inline RealType entropy(const uniform_distribution<RealType, Policy>& dist)
   {
-    using std::log;
+    BOOST_MATH_STD_USING
     return log(dist.upper() - dist.lower());
   }
 
diff --git a/include/boost/math/distributions/weibull.hpp b/include/boost/math/distributions/weibull.hpp
index ca4bbd7b53..eb4de106c8 100644
--- a/include/boost/math/distributions/weibull.hpp
+++ b/include/boost/math/distributions/weibull.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,6 +10,10 @@
 // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3668.htm
 // http://mathworld.wolfram.com/WeibullDistribution.html
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/distributions/fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/special_functions/log1p.hpp>
@@ -16,14 +21,12 @@
 #include <boost/math/distributions/detail/common_error_handling.hpp>
 #include <boost/math/distributions/complement.hpp>
 
-#include <utility>
-
 namespace boost{ namespace math
 {
 namespace detail{
 
 template <class RealType, class Policy>
-inline bool check_weibull_shape(
+BOOST_MATH_GPU_ENABLED inline bool check_weibull_shape(
       const char* function,
       RealType shape,
       RealType* result, const Policy& pol)
@@ -39,7 +42,7 @@ inline bool check_weibull_shape(
 }
 
 template <class RealType, class Policy>
-inline bool check_weibull_x(
+BOOST_MATH_GPU_ENABLED inline bool check_weibull_x(
       const char* function,
       RealType const& x,
       RealType* result, const Policy& pol)
@@ -55,7 +58,7 @@ inline bool check_weibull_x(
 }
 
 template <class RealType, class Policy>
-inline bool check_weibull(
+BOOST_MATH_GPU_ENABLED inline bool check_weibull(
       const char* function,
       RealType scale,
       RealType shape,
@@ -73,19 +76,19 @@ class weibull_distribution
    using value_type = RealType;
    using policy_type = Policy;
 
-   explicit weibull_distribution(RealType l_shape, RealType l_scale = 1)
+   BOOST_MATH_GPU_ENABLED explicit weibull_distribution(RealType l_shape, RealType l_scale = 1)
       : m_shape(l_shape), m_scale(l_scale)
    {
       RealType result;
       detail::check_weibull("boost::math::weibull_distribution<%1%>::weibull_distribution", l_scale, l_shape, &result, Policy());
    }
 
-   RealType shape()const
+   BOOST_MATH_GPU_ENABLED RealType shape()const
    {
       return m_shape;
    }
 
-   RealType scale()const
+   BOOST_MATH_GPU_ENABLED RealType scale()const
    {
       return m_scale;
    }
@@ -107,28 +110,28 @@ weibull_distribution(RealType,RealType)->weibull_distribution<typename boost::ma
 #endif
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> range(const weibull_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> range(const weibull_distribution<RealType, Policy>& /*dist*/)
 { // Range of permissible values for random variable x.
    using boost::math::tools::max_value;
-   return std::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(static_cast<RealType>(0), max_value<RealType>());
 }
 
 template <class RealType, class Policy>
-inline std::pair<RealType, RealType> support(const weibull_distribution<RealType, Policy>& /*dist*/)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<RealType, RealType> support(const weibull_distribution<RealType, Policy>& /*dist*/)
 { // Range of supported values for random variable x.
    // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero.
    using boost::math::tools::max_value;
    using boost::math::tools::min_value;
-   return std::pair<RealType, RealType>(min_value<RealType>(),  max_value<RealType>());
+   return boost::math::pair<RealType, RealType>(min_value<RealType>(),  max_value<RealType>());
    // A discontinuity at x == 0, so only support down to min_value.
 }
 
 template <class RealType, class Policy>
-inline RealType pdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType pdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::pdf(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::pdf(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -158,11 +161,11 @@ inline RealType pdf(const weibull_distribution<RealType, Policy>& dist, const Re
 }
 
 template <class RealType, class Policy>
-inline RealType logpdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logpdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::logpdf(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::logpdf(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -192,11 +195,11 @@ inline RealType logpdf(const weibull_distribution<RealType, Policy>& dist, const
 }
 
 template <class RealType, class Policy>
-inline RealType cdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -213,11 +216,11 @@ inline RealType cdf(const weibull_distribution<RealType, Policy>& dist, const Re
 }
 
 template <class RealType, class Policy>
-inline RealType logcdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const weibull_distribution<RealType, Policy>& dist, const RealType& x)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -234,11 +237,11 @@ inline RealType logcdf(const weibull_distribution<RealType, Policy>& dist, const
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const weibull_distribution<RealType, Policy>& dist, const RealType& p)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const weibull_distribution<RealType, Policy>& dist, const RealType& p)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -258,11 +261,11 @@ inline RealType quantile(const weibull_distribution<RealType, Policy>& dist, con
 }
 
 template <class RealType, class Policy>
-inline RealType cdf(const complemented2_type<weibull_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType cdf(const complemented2_type<weibull_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::cdf(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = c.dist.shape();
    RealType scale = c.dist.scale();
@@ -279,11 +282,11 @@ inline RealType cdf(const complemented2_type<weibull_distribution<RealType, Poli
 }
 
 template <class RealType, class Policy>
-inline RealType logcdf(const complemented2_type<weibull_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType logcdf(const complemented2_type<weibull_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::logcdf(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = c.dist.shape();
    RealType scale = c.dist.scale();
@@ -300,11 +303,11 @@ inline RealType logcdf(const complemented2_type<weibull_distribution<RealType, P
 }
 
 template <class RealType, class Policy>
-inline RealType quantile(const complemented2_type<weibull_distribution<RealType, Policy>, RealType>& c)
+BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<weibull_distribution<RealType, Policy>, RealType>& c)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)";
+   constexpr auto function = "boost::math::quantile(const weibull_distribution<%1%>, %1%)";
 
    RealType shape = c.dist.shape();
    RealType scale = c.dist.scale();
@@ -325,11 +328,11 @@ inline RealType quantile(const complemented2_type<weibull_distribution<RealType,
 }
 
 template <class RealType, class Policy>
-inline RealType mean(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mean(const weibull_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::mean(const weibull_distribution<%1%>)";
+   constexpr auto function = "boost::math::mean(const weibull_distribution<%1%>)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -343,12 +346,12 @@ inline RealType mean(const weibull_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType variance(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType variance(const weibull_distribution<RealType, Policy>& dist)
 {
    RealType shape = dist.shape();
    RealType scale = dist.scale();
 
-   static const char* function = "boost::math::variance(const weibull_distribution<%1%>)";
+   constexpr auto function = "boost::math::variance(const weibull_distribution<%1%>)";
 
    RealType result = 0;
    if(false == detail::check_weibull(function, scale, shape, &result, Policy()))
@@ -363,11 +366,11 @@ inline RealType variance(const weibull_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType mode(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType mode(const weibull_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std function pow.
 
-   static const char* function = "boost::math::mode(const weibull_distribution<%1%>)";
+   constexpr auto function = "boost::math::mode(const weibull_distribution<%1%>)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -384,11 +387,11 @@ inline RealType mode(const weibull_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType median(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType median(const weibull_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std function pow.
 
-   static const char* function = "boost::math::median(const weibull_distribution<%1%>)";
+   constexpr auto function = "boost::math::median(const weibull_distribution<%1%>)";
 
    RealType shape = dist.shape(); // Wikipedia k
    RealType scale = dist.scale(); // Wikipedia lambda
@@ -404,11 +407,11 @@ inline RealType median(const weibull_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType skewness(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType skewness(const weibull_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::skewness(const weibull_distribution<%1%>)";
+   constexpr auto function = "boost::math::skewness(const weibull_distribution<%1%>)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -429,11 +432,11 @@ inline RealType skewness(const weibull_distribution<RealType, Policy>& dist)
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis_excess(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis_excess(const weibull_distribution<RealType, Policy>& dist)
 {
    BOOST_MATH_STD_USING  // for ADL of std functions
 
-   static const char* function = "boost::math::kurtosis_excess(const weibull_distribution<%1%>)";
+   constexpr auto function = "boost::math::kurtosis_excess(const weibull_distribution<%1%>)";
 
    RealType shape = dist.shape();
    RealType scale = dist.scale();
@@ -457,15 +460,15 @@ inline RealType kurtosis_excess(const weibull_distribution<RealType, Policy>& di
 }
 
 template <class RealType, class Policy>
-inline RealType kurtosis(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType kurtosis(const weibull_distribution<RealType, Policy>& dist)
 {
    return kurtosis_excess(dist) + 3;
 }
 
 template <class RealType, class Policy>
-inline RealType entropy(const weibull_distribution<RealType, Policy>& dist)
+BOOST_MATH_GPU_ENABLED inline RealType entropy(const weibull_distribution<RealType, Policy>& dist)
 {
-   using std::log;
+   BOOST_MATH_STD_USING
    RealType k = dist.shape();
    RealType lambda = dist.scale();
    return constants::euler<RealType>()*(1-1/k) + log(lambda/k) + 1;
diff --git a/include/boost/math/policies/error_handling.hpp b/include/boost/math/policies/error_handling.hpp
index 070266c7fe..0a22dffa7f 100644
--- a/include/boost/math/policies/error_handling.hpp
+++ b/include/boost/math/policies/error_handling.hpp
@@ -1,6 +1,6 @@
 //  Copyright John Maddock 2007.
 //  Copyright Paul A. Bristow 2007.
-
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,6 +9,15 @@
 #define BOOST_MATH_POLICY_ERROR_HANDLING_HPP
 
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/tools/precision.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <iomanip>
 #include <string>
 #include <cstring>
@@ -19,8 +28,6 @@
 #include <complex>
 #include <cmath>
 #include <cstdint>
-#include <boost/math/policies/policy.hpp>
-#include <boost/math/tools/precision.hpp>
 #ifndef BOOST_MATH_NO_EXCEPTIONS
 #include <stdexcept>
 #include <boost/math/tools/throw_exception.hpp>
@@ -199,7 +206,7 @@ void raise_error(const char* pfunction, const char* pmessage, const T& val)
 #endif
 
 template <class T>
-inline T raise_domain_error(
+BOOST_MATH_GPU_ENABLED inline T raise_domain_error(
            const char* function,
            const char* message,
            const T& val,
@@ -210,12 +217,12 @@ inline T raise_domain_error(
 #else
    raise_error<std::domain_error, T>(function, message, val);
    // we never get here:
-   return std::numeric_limits<T>::quiet_NaN();
+   return boost::math::numeric_limits<T>::quiet_NaN();
 #endif
 }
 
 template <class T>
-inline constexpr T raise_domain_error(
+BOOST_MATH_GPU_ENABLED constexpr T raise_domain_error(
            const char* ,
            const char* ,
            const T& ,
@@ -223,11 +230,11 @@ inline constexpr T raise_domain_error(
 {
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
-   return std::numeric_limits<T>::quiet_NaN();
+   return boost::math::numeric_limits<T>::quiet_NaN();
 }
 
 template <class T>
-inline T raise_domain_error(
+BOOST_MATH_GPU_ENABLED inline T raise_domain_error(
            const char* ,
            const char* ,
            const T& ,
@@ -236,11 +243,11 @@ inline T raise_domain_error(
    errno = EDOM;
    // This may or may not do the right thing, but the user asked for the error
    // to be silent so here we go anyway:
-   return std::numeric_limits<T>::quiet_NaN();
+   return boost::math::numeric_limits<T>::quiet_NaN();
 }
 
 template <class T>
-inline T raise_domain_error(
+BOOST_MATH_GPU_ENABLED inline T raise_domain_error(
            const char* function,
            const char* message,
            const T& val,
@@ -250,7 +257,7 @@ inline T raise_domain_error(
 }
 
 template <class T>
-inline T raise_pole_error(
+BOOST_MATH_GPU_ENABLED inline T raise_pole_error(
            const char* function,
            const char* message,
            const T& val,
@@ -264,7 +271,7 @@ inline T raise_pole_error(
 }
 
 template <class T>
-inline constexpr T raise_pole_error(
+BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error(
            const char* function,
            const char* message,
            const T& val,
@@ -274,7 +281,7 @@ inline constexpr T raise_pole_error(
 }
 
 template <class T>
-inline constexpr T raise_pole_error(
+BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error(
            const char* function,
            const char* message,
            const T& val,
@@ -284,7 +291,7 @@ inline constexpr T raise_pole_error(
 }
 
 template <class T>
-inline T raise_pole_error(
+BOOST_MATH_GPU_ENABLED inline T raise_pole_error(
            const char* function,
            const char* message,
            const T& val,
@@ -294,7 +301,7 @@ inline T raise_pole_error(
 }
 
 template <class T>
-inline T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
            const char* function,
            const char* message,
            const  ::boost::math::policies::overflow_error< ::boost::math::policies::throw_on_error>&)
@@ -304,12 +311,12 @@ inline T raise_overflow_error(
 #else
    raise_error<std::overflow_error, T>(function, message ? message : "numeric overflow");
    // We should never get here:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 #endif
 }
 
 template <class T>
-inline T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
            const char* function,
            const char* message,
            const T& val,
@@ -320,23 +327,23 @@ inline T raise_overflow_error(
 #else
    raise_error<std::overflow_error, T>(function, message ? message : "numeric overflow", val);
    // We should never get here:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 #endif
 }
 
 template <class T>
-inline constexpr T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(
            const char* ,
            const char* ,
            const  ::boost::math::policies::overflow_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T)
 {
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 template <class T>
-inline constexpr T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(
            const char* ,
            const char* ,
            const T&,
@@ -344,11 +351,11 @@ inline constexpr T raise_overflow_error(
 {
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 template <class T>
-inline T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
            const char* ,
            const char* ,
            const  ::boost::math::policies::overflow_error< ::boost::math::policies::errno_on_error>&) BOOST_MATH_NOEXCEPT(T)
@@ -356,11 +363,11 @@ inline T raise_overflow_error(
    errno = ERANGE;
    // This may or may not do the right thing, but the user asked for the error
    // to be silent so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 template <class T>
-inline T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
            const char* ,
            const char* ,
            const T&,
@@ -369,20 +376,20 @@ inline T raise_overflow_error(
    errno = ERANGE;
    // This may or may not do the right thing, but the user asked for the error
    // to be silent so here we go anyway:
-   return std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : boost::math::tools::max_value<T>();
 }
 
 template <class T>
-inline T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
            const char* function,
            const char* message,
            const  ::boost::math::policies::overflow_error< ::boost::math::policies::user_error>&)
 {
-   return user_overflow_error(function, message, std::numeric_limits<T>::infinity());
+   return user_overflow_error(function, message, boost::math::numeric_limits<T>::infinity());
 }
 
 template <class T>
-inline T raise_overflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_overflow_error(
            const char* function,
            const char* message,
            const T& val,
@@ -392,11 +399,11 @@ inline T raise_overflow_error(
    std::string sval = prec_format(val);
    replace_all_in_string(m, "%1%", sval.c_str());
 
-   return user_overflow_error(function, m.c_str(), std::numeric_limits<T>::infinity());
+   return user_overflow_error(function, m.c_str(), boost::math::numeric_limits<T>::infinity());
 }
 
 template <class T>
-inline T raise_underflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_underflow_error(
            const char* function,
            const char* message,
            const  ::boost::math::policies::underflow_error< ::boost::math::policies::throw_on_error>&)
@@ -411,7 +418,7 @@ inline T raise_underflow_error(
 }
 
 template <class T>
-inline constexpr T raise_underflow_error(
+BOOST_MATH_GPU_ENABLED constexpr T raise_underflow_error(
            const char* ,
            const char* ,
            const  ::boost::math::policies::underflow_error< ::boost::math::policies::ignore_error>&) BOOST_MATH_NOEXCEPT(T)
@@ -422,7 +429,7 @@ inline constexpr T raise_underflow_error(
 }
 
 template <class T>
-inline T raise_underflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_underflow_error(
            const char* /* function */,
            const char* /* message */,
            const  ::boost::math::policies::underflow_error< ::boost::math::policies::errno_on_error>&) BOOST_MATH_NOEXCEPT(T)
@@ -434,7 +441,7 @@ inline T raise_underflow_error(
 }
 
 template <class T>
-inline T raise_underflow_error(
+BOOST_MATH_GPU_ENABLED inline T raise_underflow_error(
            const char* function,
            const char* message,
            const  ::boost::math::policies::underflow_error< ::boost::math::policies::user_error>&)
@@ -443,7 +450,7 @@ inline T raise_underflow_error(
 }
 
 template <class T>
-inline T raise_denorm_error(
+BOOST_MATH_GPU_ENABLED inline T raise_denorm_error(
            const char* function,
            const char* message,
            const T& /* val */,
@@ -459,7 +466,7 @@ inline T raise_denorm_error(
 }
 
 template <class T>
-inline constexpr T raise_denorm_error(
+BOOST_MATH_GPU_ENABLED inline constexpr T raise_denorm_error(
            const char* ,
            const char* ,
            const T&  val,
@@ -471,7 +478,7 @@ inline constexpr T raise_denorm_error(
 }
 
 template <class T>
-inline T raise_denorm_error(
+BOOST_MATH_GPU_ENABLED inline T raise_denorm_error(
            const char* ,
            const char* ,
            const T& val,
@@ -484,7 +491,7 @@ inline T raise_denorm_error(
 }
 
 template <class T>
-inline T raise_denorm_error(
+BOOST_MATH_GPU_ENABLED inline T raise_denorm_error(
            const char* function,
            const char* message,
            const T& val,
@@ -494,7 +501,7 @@ inline T raise_denorm_error(
 }
 
 template <class T>
-inline T raise_evaluation_error(
+BOOST_MATH_GPU_ENABLED inline T raise_evaluation_error(
            const char* function,
            const char* message,
            const T& val,
@@ -510,7 +517,7 @@ inline T raise_evaluation_error(
 }
 
 template <class T>
-inline constexpr T raise_evaluation_error(
+BOOST_MATH_GPU_ENABLED constexpr T raise_evaluation_error(
            const char* ,
            const char* ,
            const T& val,
@@ -522,7 +529,7 @@ inline constexpr T raise_evaluation_error(
 }
 
 template <class T>
-inline T raise_evaluation_error(
+BOOST_MATH_GPU_ENABLED inline T raise_evaluation_error(
            const char* ,
            const char* ,
            const T& val,
@@ -535,7 +542,7 @@ inline T raise_evaluation_error(
 }
 
 template <class T>
-inline T raise_evaluation_error(
+BOOST_MATH_GPU_ENABLED inline T raise_evaluation_error(
            const char* function,
            const char* message,
            const T& val,
@@ -545,7 +552,7 @@ inline T raise_evaluation_error(
 }
 
 template <class T, class TargetType>
-inline TargetType raise_rounding_error(
+BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error(
            const char* function,
            const char* message,
            const T& val,
@@ -562,7 +569,7 @@ inline TargetType raise_rounding_error(
 }
 
 template <class T, class TargetType>
-inline constexpr TargetType raise_rounding_error(
+BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error(
            const char* ,
            const char* ,
            const T& val,
@@ -571,12 +578,12 @@ inline constexpr TargetType raise_rounding_error(
 {
    // This may or may not do the right thing, but the user asked for the error
    // to be ignored so here we go anyway:
-   static_assert(std::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
-   return  val > 0 ? (std::numeric_limits<TargetType>::max)() : (std::numeric_limits<TargetType>::is_integer ? (std::numeric_limits<TargetType>::min)() : -(std::numeric_limits<TargetType>::max)());
+   static_assert(boost::math::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
+   return  val > 0 ? (boost::math::numeric_limits<TargetType>::max)() : (boost::math::numeric_limits<TargetType>::is_integer ? (boost::math::numeric_limits<TargetType>::min)() : -(boost::math::numeric_limits<TargetType>::max)());
 }
 
 template <class T, class TargetType>
-inline TargetType raise_rounding_error(
+BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error(
            const char* ,
            const char* ,
            const T& val,
@@ -586,11 +593,11 @@ inline TargetType raise_rounding_error(
    errno = ERANGE;
    // This may or may not do the right thing, but the user asked for the error
    // to be silent so here we go anyway:
-   static_assert(std::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
-   return  val > 0 ? (std::numeric_limits<TargetType>::max)() : (std::numeric_limits<TargetType>::is_integer ? (std::numeric_limits<TargetType>::min)() : -(std::numeric_limits<TargetType>::max)());
+   static_assert(boost::math::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
+   return  val > 0 ? (boost::math::numeric_limits<TargetType>::max)() : (boost::math::numeric_limits<TargetType>::is_integer ? (boost::math::numeric_limits<TargetType>::min)() : -(boost::math::numeric_limits<TargetType>::max)());
 }
 template <class T, class TargetType>
-inline TargetType raise_rounding_error(
+BOOST_MATH_GPU_ENABLED inline TargetType raise_rounding_error(
            const char* function,
            const char* message,
            const T& val,
@@ -601,7 +608,7 @@ inline TargetType raise_rounding_error(
 }
 
 template <class T, class R>
-inline T raise_indeterminate_result_error(
+BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error(
            const char* function,
            const char* message,
            const T& val,
@@ -613,12 +620,12 @@ inline T raise_indeterminate_result_error(
 #else
    raise_error<std::domain_error, T>(function, message, val);
    // we never get here:
-   return std::numeric_limits<T>::quiet_NaN();
+   return boost::math::numeric_limits<T>::quiet_NaN();
 #endif
 }
 
 template <class T, class R>
-inline constexpr T raise_indeterminate_result_error(
+BOOST_MATH_GPU_ENABLED inline constexpr T raise_indeterminate_result_error(
            const char* ,
            const char* ,
            const T& ,
@@ -631,7 +638,7 @@ inline constexpr T raise_indeterminate_result_error(
 }
 
 template <class T, class R>
-inline T raise_indeterminate_result_error(
+BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error(
            const char* ,
            const char* ,
            const T& ,
@@ -645,7 +652,7 @@ inline T raise_indeterminate_result_error(
 }
 
 template <class T, class R>
-inline T raise_indeterminate_result_error(
+BOOST_MATH_GPU_ENABLED inline T raise_indeterminate_result_error(
            const char* function,
            const char* message,
            const T& val,
@@ -658,7 +665,7 @@ inline T raise_indeterminate_result_error(
 }  // namespace detail
 
 template <class T, class Policy>
-inline constexpr T raise_domain_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_domain_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::domain_error_type policy_type;
    return detail::raise_domain_error(
@@ -667,7 +674,7 @@ inline constexpr T raise_domain_error(const char* function, const char* message,
 }
 
 template <class T, class Policy>
-inline constexpr T raise_pole_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::pole_error_type policy_type;
    return detail::raise_pole_error(
@@ -676,7 +683,7 @@ inline constexpr T raise_pole_error(const char* function, const char* message, c
 }
 
 template <class T, class Policy>
-inline constexpr T raise_overflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::overflow_error_type policy_type;
    return detail::raise_overflow_error<T>(
@@ -685,7 +692,7 @@ inline constexpr T raise_overflow_error(const char* function, const char* messag
 }
 
 template <class T, class Policy>
-inline constexpr T raise_overflow_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::overflow_error_type policy_type;
    return detail::raise_overflow_error(
@@ -694,7 +701,7 @@ inline constexpr T raise_overflow_error(const char* function, const char* messag
 }
 
 template <class T, class Policy>
-inline constexpr T raise_underflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_underflow_error(const char* function, const char* message, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::underflow_error_type policy_type;
    return detail::raise_underflow_error<T>(
@@ -703,7 +710,7 @@ inline constexpr T raise_underflow_error(const char* function, const char* messa
 }
 
 template <class T, class Policy>
-inline constexpr T raise_denorm_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_denorm_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::denorm_error_type policy_type;
    return detail::raise_denorm_error<T>(
@@ -713,7 +720,7 @@ inline constexpr T raise_denorm_error(const char* function, const char* message,
 }
 
 template <class T, class Policy>
-inline constexpr T raise_evaluation_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_evaluation_error(const char* function, const char* message, const T& val, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::evaluation_error_type policy_type;
    return detail::raise_evaluation_error(
@@ -722,7 +729,7 @@ inline constexpr T raise_evaluation_error(const char* function, const char* mess
 }
 
 template <class T, class TargetType, class Policy>
-inline constexpr TargetType raise_rounding_error(const char* function, const char* message, const T& val, const TargetType& t, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error(const char* function, const char* message, const T& val, const TargetType& t, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::rounding_error_type policy_type;
    return detail::raise_rounding_error(
@@ -731,7 +738,7 @@ inline constexpr TargetType raise_rounding_error(const char* function, const cha
 }
 
 template <class T, class R, class Policy>
-inline constexpr T raise_indeterminate_result_error(const char* function, const char* message, const T& val, const R& result, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED constexpr T raise_indeterminate_result_error(const char* function, const char* message, const T& val, const R& result, const Policy&) noexcept(is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T))
 {
    typedef typename Policy::indeterminate_result_error_type policy_type;
    return detail::raise_indeterminate_result_error(
@@ -746,7 +753,7 @@ namespace detail
 {
 
 template <class R, class T, class Policy>
-BOOST_MATH_FORCEINLINE bool check_overflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_overflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
 {
    BOOST_MATH_STD_USING
    if(fabs(val) > tools::max_value<R>())
@@ -758,7 +765,7 @@ BOOST_MATH_FORCEINLINE bool check_overflow(T val, R* result, const char* functio
    return false;
 }
 template <class R, class T, class Policy>
-BOOST_MATH_FORCEINLINE bool check_overflow(std::complex<T> val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_overflow(std::complex<T> val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
 {
    typedef typename R::value_type r_type;
    r_type re, im;
@@ -768,7 +775,7 @@ BOOST_MATH_FORCEINLINE bool check_overflow(std::complex<T> val, R* result, const
    return r;
 }
 template <class R, class T, class Policy>
-BOOST_MATH_FORCEINLINE bool check_underflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_underflow(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
 {
    if((val != 0) && (static_cast<R>(val) == 0))
    {
@@ -778,7 +785,7 @@ BOOST_MATH_FORCEINLINE bool check_underflow(T val, R* result, const char* functi
    return false;
 }
 template <class R, class T, class Policy>
-BOOST_MATH_FORCEINLINE bool check_underflow(std::complex<T> val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_underflow(std::complex<T> val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
 {
    typedef typename R::value_type r_type;
    r_type re, im;
@@ -788,7 +795,7 @@ BOOST_MATH_FORCEINLINE bool check_underflow(std::complex<T> val, R* result, cons
    return r;
 }
 template <class R, class T, class Policy>
-BOOST_MATH_FORCEINLINE bool check_denorm(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_denorm(T val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
 {
    BOOST_MATH_STD_USING
    if((fabs(val) < static_cast<T>(tools::min_value<R>())) && (static_cast<R>(val) != 0))
@@ -799,7 +806,7 @@ BOOST_MATH_FORCEINLINE bool check_denorm(T val, R* result, const char* function,
    return false;
 }
 template <class R, class T, class Policy>
-BOOST_MATH_FORCEINLINE bool check_denorm(std::complex<T> val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE bool check_denorm(std::complex<T> val, R* result, const char* function, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && (Policy::value != throw_on_error) && (Policy::value != user_error))
 {
    typedef typename R::value_type r_type;
    r_type re, im;
@@ -811,28 +818,28 @@ BOOST_MATH_FORCEINLINE bool check_denorm(std::complex<T> val, R* result, const c
 
 // Default instantiations with ignore_error policy.
 template <class R, class T>
-BOOST_MATH_FORCEINLINE constexpr bool check_overflow(T /* val */, R* /* result */, const char* /* function */, const overflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_overflow(T /* val */, R* /* result */, const char* /* function */, const overflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
 { return false; }
 template <class R, class T>
-BOOST_MATH_FORCEINLINE constexpr bool check_overflow(std::complex<T> /* val */, R* /* result */, const char* /* function */, const overflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_overflow(std::complex<T> /* val */, R* /* result */, const char* /* function */, const overflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
 { return false; }
 template <class R, class T>
-BOOST_MATH_FORCEINLINE constexpr bool check_underflow(T /* val */, R* /* result */, const char* /* function */, const underflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_underflow(T /* val */, R* /* result */, const char* /* function */, const underflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
 { return false; }
 template <class R, class T>
-BOOST_MATH_FORCEINLINE constexpr bool check_underflow(std::complex<T> /* val */, R* /* result */, const char* /* function */, const underflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_underflow(std::complex<T> /* val */, R* /* result */, const char* /* function */, const underflow_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
 { return false; }
 template <class R, class T>
-BOOST_MATH_FORCEINLINE constexpr bool check_denorm(T /* val */, R* /* result*/, const char* /* function */, const denorm_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_denorm(T /* val */, R* /* result*/, const char* /* function */, const denorm_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
 { return false; }
 template <class R, class T>
-BOOST_MATH_FORCEINLINE constexpr bool check_denorm(std::complex<T> /* val */, R* /* result*/, const char* /* function */, const denorm_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE constexpr bool check_denorm(std::complex<T> /* val */, R* /* result*/, const char* /* function */, const denorm_error<ignore_error>&) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T))
 { return false; }
 
 } // namespace detail
 
 template <class R, class Policy, class T>
-BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy<Policy>::value)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noexcept(BOOST_MATH_IS_FLOAT(R) && BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy<Policy>::value)
 {
    typedef typename Policy::overflow_error_type overflow_type;
    typedef typename Policy::underflow_error_type underflow_type;
@@ -852,7 +859,7 @@ BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noe
 }
 
 template <class T, class Policy>
-inline void check_series_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy<Policy>::value)
+BOOST_MATH_GPU_ENABLED inline void check_series_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy<Policy>::value)
 {
    if(max_iter >= policies::get_max_series_iterations<Policy>())
       raise_evaluation_error<T>(
@@ -861,7 +868,7 @@ inline void check_series_iterations(const char* function, std::uintmax_t max_ite
 }
 
 template <class T, class Policy>
-inline void check_root_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy<Policy>::value)
+BOOST_MATH_GPU_ENABLED inline void check_root_iterations(const char* function, std::uintmax_t max_iter, const Policy& pol) noexcept(BOOST_MATH_IS_FLOAT(T) && is_noexcept_error_policy<Policy>::value)
 {
    if(max_iter >= policies::get_max_root_iterations<Policy>())
       raise_evaluation_error<T>(
@@ -871,25 +878,169 @@ inline void check_root_iterations(const char* function, std::uintmax_t max_iter,
 
 } //namespace policies
 
-namespace detail{
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif
+
+}} // namespaces boost/math
+
+#else // Special values for NVRTC
+
+namespace boost {
+namespace math {
+namespace policies {
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED constexpr T raise_domain_error(
+           const char* ,
+           const char* ,
+           const T& ,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   return boost::math::numeric_limits<T>::quiet_NaN();
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED constexpr T raise_pole_error(
+           const char* function,
+           const char* message,
+           const T& val,
+           const  Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   return boost::math::numeric_limits<T>::quiet_NaN();
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(
+           const char* ,
+           const char* ,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : (boost::math::numeric_limits<T>::max)();
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED constexpr T raise_overflow_error(
+           const char* ,
+           const char* ,
+           const T&,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   return boost::math::numeric_limits<T>::has_infinity ? boost::math::numeric_limits<T>::infinity() : (boost::math::numeric_limits<T>::max)();
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED constexpr T raise_underflow_error(
+           const char* ,
+           const char* ,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   return static_cast<T>(0);
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED inline constexpr T raise_denorm_error(
+           const char* ,
+           const char* ,
+           const T& val,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   return val;
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED constexpr T raise_evaluation_error(
+           const char* ,
+           const char* ,
+           const T& val,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   return val;
+}
+
+template <class T, class TargetType, class Policy>
+BOOST_MATH_GPU_ENABLED constexpr TargetType raise_rounding_error(
+           const char* ,
+           const char* ,
+           const T& val,
+           const TargetType&,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   static_assert(boost::math::numeric_limits<TargetType>::is_specialized, "The target type must have std::numeric_limits specialized.");
+   return  val > 0 ? (boost::math::numeric_limits<TargetType>::max)() : (boost::math::numeric_limits<TargetType>::is_integer ? (boost::math::numeric_limits<TargetType>::min)() : -(boost::math::numeric_limits<TargetType>::max)());
+}
+
+template <class T, class R, class Policy>
+BOOST_MATH_GPU_ENABLED inline constexpr T raise_indeterminate_result_error(
+           const char* ,
+           const char* ,
+           const T& ,
+           const R& result,
+           const Policy&) BOOST_MATH_NOEXCEPT(T)
+{
+   // This may or may not do the right thing, but the user asked for the error
+   // to be ignored so here we go anyway:
+   return result;
+}
+
+template <class R, class Policy, class T>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE R checked_narrowing_cast(T val, const char* function) noexcept(boost::math::is_floating_point_v<R> && boost::math::is_floating_point_v<T>)
+{
+   // We only have ignore error policy so no reason to check
+   return static_cast<R>(val);
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED inline void check_series_iterations(const char* function, boost::math::uintmax_t max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v<T>)
+{
+   if(max_iter >= policies::get_max_series_iterations<Policy>())
+      raise_evaluation_error<T>(
+         function,
+         "Series evaluation exceeded %1% iterations, giving up now.", static_cast<T>(static_cast<double>(max_iter)), pol);
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED inline void check_root_iterations(const char* function, boost::math::uintmax_t max_iter, const Policy& pol) noexcept(boost::math::is_floating_point_v<T>)
+{
+   if(max_iter >= policies::get_max_root_iterations<Policy>())
+      raise_evaluation_error<T>(
+         function,
+         "Root finding evaluation exceeded %1% iterations, giving up now.", static_cast<T>(static_cast<double>(max_iter)), pol);
+}
+
+} // namespace policies
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTC
+
+namespace boost { namespace math { namespace detail {
 
 //
 // Simple helper function to assist in returning a pair from a single value,
 // that value usually comes from one of the error handlers above:
 //
 template <class T>
-std::pair<T, T> pair_from_single(const T& val) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED boost::math::pair<T, T> pair_from_single(const T& val) BOOST_MATH_NOEXCEPT(T)
 {
-   return std::make_pair(val, val);
+   return boost::math::make_pair(val, val);
 }
 
-}
-
-#ifdef _MSC_VER
-#  pragma warning(pop)
-#endif
-
-}} // namespaces boost/math
+}}} // boost::math::detail
 
 #endif // BOOST_MATH_POLICY_ERROR_HANDLING_HPP
 
diff --git a/include/boost/math/policies/policy.hpp b/include/boost/math/policies/policy.hpp
index eb09682e32..ec7b36f2d5 100644
--- a/include/boost/math/policies/policy.hpp
+++ b/include/boost/math/policies/policy.hpp
@@ -9,11 +9,9 @@
 
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/mp.hpp>
-#include <limits>
-#include <type_traits>
-#include <cmath>
-#include <cstdint>
-#include <cstddef>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 
 namespace boost{ namespace math{
 
@@ -22,9 +20,9 @@ namespace mp = tools::meta_programming;
 namespace tools{
 
 template <class T>
-constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept;
+BOOST_MATH_GPU_ENABLED constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept;
 template <class T>
-constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value);
+BOOST_MATH_GPU_ENABLED constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value);
 
 }
 
@@ -33,6 +31,33 @@ namespace policies{
 //
 // Define macros for our default policies, if they're not defined already:
 //
+
+
+//
+// Generic support for GPUs
+//
+#ifdef BOOST_MATH_HAS_GPU_SUPPORT
+#  ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY
+#    define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#  endif
+#  ifndef BOOST_MATH_PROMOTE_DOUBLE_POLICY
+#     define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+#  endif
+#  ifndef BOOST_MATH_DOMAIN_ERROR_POLICY
+#    define BOOST_MATH_DOMAIN_ERROR_POLICY ignore_error
+#  endif
+#  ifndef BOOST_MATH_POLE_ERROR_POLICY
+#     define BOOST_MATH_POLE_ERROR_POLICY ignore_error
+#  endif
+#  ifndef BOOST_MATH_EVALUATION_ERROR_POLICY
+#     define BOOST_MATH_EVALUATION_ERROR_POLICY ignore_error
+#  endif
+#  ifndef BOOST_MATH_ROUNDING_ERROR_POLICY
+#     define BOOST_MATH_ROUNDING_ERROR_POLICY ignore_error
+#  endif
+#endif
+
+//
 // Special cases for exceptions disabled first:
 //
 #ifdef BOOST_MATH_NO_EXCEPTIONS
@@ -107,20 +132,20 @@ namespace policies{
 
 #define BOOST_MATH_META_INT(Type, name, Default)                                                \
    template <Type N = Default>                                                                  \
-   class name : public std::integral_constant<int, N>{};                                        \
+   class name : public boost::math::integral_constant<int, N>{};                                \
                                                                                                 \
    namespace detail{                                                                            \
    template <Type N>                                                                            \
-   char test_is_valid_arg(const name<N>* = nullptr);                                            \
-   char test_is_default_arg(const name<Default>* = nullptr);                                    \
+   BOOST_MATH_GPU_ENABLED char test_is_valid_arg(const name<N>* = nullptr);                     \
+   BOOST_MATH_GPU_ENABLED char test_is_default_arg(const name<Default>* = nullptr);             \
                                                                                                 \
    template <typename T>                                                                        \
    class is_##name##_imp                                                                        \
    {                                                                                            \
    private:                                                                                     \
       template <Type N>                                                                         \
-      static char test(const name<N>* = nullptr);                                               \
-      static double test(...);                                                                  \
+      BOOST_MATH_GPU_ENABLED static char test(const name<N>* = nullptr);                        \
+      BOOST_MATH_GPU_ENABLED static double test(...);                                           \
    public:                                                                                      \
       static constexpr bool value = sizeof(test(static_cast<T*>(nullptr))) == sizeof(char);     \
    };                                                                                           \
@@ -131,27 +156,27 @@ namespace policies{
    {                                                                                            \
    public:                                                                                      \
       static constexpr bool value = boost::math::policies::detail::is_##name##_imp<T>::value;   \
-      using type = std::integral_constant<bool, value>;                                         \
+      using type = boost::math::integral_constant<bool, value>;                                 \
    };
 
 #define BOOST_MATH_META_BOOL(name, Default)                                                     \
    template <bool N = Default>                                                                  \
-   class name : public std::integral_constant<bool, N>{};                                       \
+   class name : public boost::math::integral_constant<bool, N>{};                               \
                                                                                                 \
    namespace detail{                                                                            \
    template <bool N>                                                                            \
-   char test_is_valid_arg(const name<N>* = nullptr);                                            \
-   char test_is_default_arg(const name<Default>* = nullptr);                                    \
+   BOOST_MATH_GPU_ENABLED char test_is_valid_arg(const name<N>* = nullptr);                     \
+   BOOST_MATH_GPU_ENABLED char test_is_default_arg(const name<Default>* = nullptr);             \
                                                                                                 \
    template <typename T>                                                                        \
    class is_##name##_imp                                                                        \
    {                                                                                            \
    private:                                                                                     \
       template <bool N>                                                                         \
-      static char test(const name<N>* = nullptr);                                               \
-      static double test(...);                                                                  \
+      BOOST_MATH_GPU_ENABLED static char test(const name<N>* = nullptr);                        \
+      BOOST_MATH_GPU_ENABLED static double test(...);                                           \
    public:                                                                                      \
-      static constexpr bool value = sizeof(test(static_cast<T*>(nullptr))) == sizeof(char);           \
+      static constexpr bool value = sizeof(test(static_cast<T*>(nullptr))) == sizeof(char);     \
    };                                                                                           \
    }                                                                                            \
                                                                                                 \
@@ -160,7 +185,7 @@ namespace policies{
    {                                                                                            \
    public:                                                                                      \
       static constexpr bool value = boost::math::policies::detail::is_##name##_imp<T>::value;   \
-      using type = std::integral_constant<bool, value>;                                         \
+      using type = boost::math::integral_constant<bool, value>;                                 \
    };
 
 //
@@ -232,27 +257,27 @@ struct precision
    //
    // Now work out the precision:
    //
-   using digits2_type = typename std::conditional<
+   using digits2_type = typename boost::math::conditional<
       (Digits10::value == 0),
       digits2<0>,
       digits2<((Digits10::value + 1) * 1000L) / 301L>
    >::type;
 public:
 #ifdef BOOST_BORLANDC
-   using type = typename std::conditional<
+   using type = typename boost::math::conditional<
       (Digits2::value > ::boost::math::policies::detail::precision<Digits10,Digits2>::digits2_type::value),
       Digits2, digits2_type>::type;
 #else
-   using type = typename std::conditional<
+   using type = typename boost::math::conditional<
       (Digits2::value > digits2_type::value),
       Digits2, digits2_type>::type;
 #endif
 };
 
-double test_is_valid_arg(...);
-double test_is_default_arg(...);
-char test_is_valid_arg(const default_policy*);
-char test_is_default_arg(const default_policy*);
+BOOST_MATH_GPU_ENABLED double test_is_valid_arg(...);
+BOOST_MATH_GPU_ENABLED double test_is_default_arg(...);
+BOOST_MATH_GPU_ENABLED char test_is_valid_arg(const default_policy*);
+BOOST_MATH_GPU_ENABLED char test_is_default_arg(const default_policy*);
 
 template <typename T>
 class is_valid_policy_imp
@@ -280,7 +305,7 @@ class is_default_policy
 {
 public:
    static constexpr bool value = boost::math::policies::detail::is_default_policy_imp<T>::value;
-   using type = std::integral_constant<bool, value>;
+   using type = boost::math::integral_constant<bool, value>;
 
    template <typename U>
    struct apply
@@ -289,7 +314,7 @@ class is_default_policy
    };
 };
 
-template <class Seq, class T, std::size_t N>
+template <class Seq, class T, boost::math::size_t N>
 struct append_N
 {
    using type = typename append_N<mp::mp_push_back<Seq, T>, T, N-1>::type;
@@ -378,7 +403,7 @@ class policy
    // Typelist of the arguments:
    //
    using arg_list = mp::mp_list<A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13>;
-   static constexpr std::size_t arg_list_size = mp::mp_size<arg_list>::value;
+   static constexpr boost::math::size_t arg_list_size = mp::mp_size<arg_list>::value;
 
    template<typename A, typename B, bool b>
    struct pick_arg
@@ -509,7 +534,7 @@ class normalise
 {
 private:
    using arg_list = mp::mp_list<A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13>;
-   static constexpr std::size_t arg_list_size = mp::mp_size<arg_list>::value;
+   static constexpr boost::math::size_t arg_list_size = mp::mp_size<arg_list>::value;
 
    template<typename A, typename B, bool b>
    struct pick_arg
@@ -640,81 +665,81 @@ struct normalise<policy<detail::forwarding_arg1, detail::forwarding_arg2>,
    using type = policy<detail::forwarding_arg1, detail::forwarding_arg2>;
 };
 
-inline constexpr policy<> make_policy() noexcept
+BOOST_MATH_GPU_ENABLED constexpr policy<> make_policy() noexcept
 { return {}; }
 
 template <class A1>
-inline constexpr typename normalise<policy<>, A1>::type make_policy(const A1&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1>::type make_policy(const A1&) noexcept
 {
    typedef typename normalise<policy<>, A1>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2>
-inline constexpr typename normalise<policy<>, A1, A2>::type make_policy(const A1&, const A2&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2>::type make_policy(const A1&, const A2&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3>
-inline constexpr typename normalise<policy<>, A1, A2, A3>::type make_policy(const A1&, const A2&, const A3&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3>::type make_policy(const A1&, const A2&, const A3&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4>::type make_policy(const A1&, const A2&, const A3&, const A4&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4>::type make_policy(const A1&, const A2&, const A3&, const A4&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4, class A5>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4, A5>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4, A5>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4, A5>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4, class A5, class A6>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4, A5, A6>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4, class A5, class A6, class A7>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9, class A10>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10>::type result_type;
    return result_type();
 }
 
 template <class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9, class A10, class A11>
-inline constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&, const A11&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11>::type make_policy(const A1&, const A2&, const A3&, const A4&, const A5&, const A6&, const A7&, const A8&, const A9&, const A10&, const A11&) noexcept
 {
    typedef typename normalise<policy<>, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11>::type result_type;
    return result_type();
@@ -732,47 +757,47 @@ struct evaluation
 template <class Policy>
 struct evaluation<float, Policy>
 {
-   using type = typename std::conditional<Policy::promote_float_type::value, double, float>::type;
+   using type = typename boost::math::conditional<Policy::promote_float_type::value, double, float>::type;
 };
 
 template <class Policy>
 struct evaluation<double, Policy>
 {
-   using type = typename std::conditional<Policy::promote_double_type::value, long double, double>::type;
+   using type = typename boost::math::conditional<Policy::promote_double_type::value, long double, double>::type;
 };
 
 template <class Real, class Policy>
 struct precision
 {
-   static_assert((std::numeric_limits<Real>::radix == 2) || ((std::numeric_limits<Real>::is_specialized == 0) || (std::numeric_limits<Real>::digits == 0)),
-   "(std::numeric_limits<Real>::radix == 2) || ((std::numeric_limits<Real>::is_specialized == 0) || (std::numeric_limits<Real>::digits == 0))");
+   static_assert((boost::math::numeric_limits<Real>::radix == 2) || ((boost::math::numeric_limits<Real>::is_specialized == 0) || (boost::math::numeric_limits<Real>::digits == 0)),
+   "(boost::math::numeric_limits<Real>::radix == 2) || ((boost::math::numeric_limits<Real>::is_specialized == 0) || (boost::math::numeric_limits<Real>::digits == 0))");
 #ifndef BOOST_BORLANDC
    using precision_type = typename Policy::precision_type;
-   using type = typename std::conditional<
-      ((std::numeric_limits<Real>::is_specialized == 0) || (std::numeric_limits<Real>::digits == 0)),
+   using type = typename boost::math::conditional<
+      ((boost::math::numeric_limits<Real>::is_specialized == 0) || (boost::math::numeric_limits<Real>::digits == 0)),
       // Possibly unknown precision:
       precision_type,
-      typename std::conditional<
-         ((std::numeric_limits<Real>::digits <= precision_type::value)
+      typename boost::math::conditional<
+         ((boost::math::numeric_limits<Real>::digits <= precision_type::value)
          || (Policy::precision_type::value <= 0)),
          // Default case, full precision for RealType:
-         digits2< std::numeric_limits<Real>::digits>,
+         digits2< boost::math::numeric_limits<Real>::digits>,
          // User customised precision:
          precision_type
       >::type
    >::type;
 #else
    using precision_type = typename Policy::precision_type;
-   using digits_t = std::integral_constant<int, std::numeric_limits<Real>::digits>;
-   using spec_t = std::integral_constant<bool, std::numeric_limits<Real>::is_specialized>;
-   using type = typename std::conditional<
-      (spec_t::value == true std::true_type || digits_t::value == 0),
+   using digits_t = boost::math::integral_constant<int, boost::math::numeric_limits<Real>::digits>;
+   using spec_t = boost::math::integral_constant<bool, boost::math::numeric_limits<Real>::is_specialized>;
+   using type = typename boost::math::conditional<
+      (spec_t::value == true boost::math::true_type || digits_t::value == 0),
       // Possibly unknown precision:
       precision_type,
-      typename std::conditional<
+      typename boost::math::conditional<
          (digits_t::value <= precision_type::value || precision_type::value <= 0),
          // Default case, full precision for RealType:
-         digits2< std::numeric_limits<Real>::digits>,
+         digits2< boost::math::numeric_limits<Real>::digits>,
          // User customised precision:
          precision_type
       >::type
@@ -785,7 +810,7 @@ struct precision
 template <class Policy>
 struct precision<BOOST_MATH_FLOAT128_TYPE, Policy>
 {
-   typedef std::integral_constant<int, 113> type;
+   typedef boost::math::integral_constant<int, 113> type;
 };
 
 #endif
@@ -793,15 +818,15 @@ struct precision<BOOST_MATH_FLOAT128_TYPE, Policy>
 namespace detail{
 
 template <class T, class Policy>
-inline constexpr int digits_imp(std::true_type const&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr int digits_imp(boost::math::true_type const&) noexcept
 {
-   static_assert( std::numeric_limits<T>::is_specialized, "std::numeric_limits<T>::is_specialized");
+   static_assert( boost::math::numeric_limits<T>::is_specialized, "boost::math::numeric_limits<T>::is_specialized");
    typedef typename boost::math::policies::precision<T, Policy>::type p_t;
    return p_t::value;
 }
 
 template <class T, class Policy>
-inline constexpr int digits_imp(std::false_type const&) noexcept
+BOOST_MATH_GPU_ENABLED constexpr int digits_imp(boost::math::false_type const&) noexcept
 {
    return tools::digits<T>();
 }
@@ -809,26 +834,26 @@ inline constexpr int digits_imp(std::false_type const&) noexcept
 } // namespace detail
 
 template <class T, class Policy>
-inline constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept
+BOOST_MATH_GPU_ENABLED constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept
 {
-   typedef std::integral_constant<bool, std::numeric_limits<T>::is_specialized > tag_type;
+   typedef boost::math::integral_constant<bool, boost::math::numeric_limits<T>::is_specialized > tag_type;
    return detail::digits_imp<T, Policy>(tag_type());
 }
 template <class T, class Policy>
-inline constexpr int digits_base10(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept
+BOOST_MATH_GPU_ENABLED constexpr int digits_base10(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept
 {
    return boost::math::policies::digits<T, Policy>() * 301 / 1000L;
 }
 
 template <class Policy>
-inline constexpr unsigned long get_max_series_iterations() noexcept
+BOOST_MATH_GPU_ENABLED constexpr unsigned long get_max_series_iterations() noexcept
 {
    typedef typename Policy::max_series_iterations_type iter_type;
    return iter_type::value;
 }
 
 template <class Policy>
-inline constexpr unsigned long get_max_root_iterations() noexcept
+BOOST_MATH_GPU_ENABLED constexpr unsigned long get_max_root_iterations() noexcept
 {
    typedef typename Policy::max_root_iterations_type iter_type;
    return iter_type::value;
@@ -839,51 +864,51 @@ namespace detail{
 template <class T, class Digits, class Small, class Default>
 struct series_factor_calc
 {
-   static T get() noexcept(std::is_floating_point<T>::value)
+   BOOST_MATH_GPU_ENABLED static T get() noexcept(boost::math::is_floating_point<T>::value)
    {
       return ldexp(T(1.0), 1 - Digits::value);
    }
 };
 
 template <class T, class Digits>
-struct series_factor_calc<T, Digits, std::true_type, std::true_type>
+struct series_factor_calc<T, Digits, boost::math::true_type, boost::math::true_type>
 {
-   static constexpr T get() noexcept(std::is_floating_point<T>::value)
+   BOOST_MATH_GPU_ENABLED static constexpr T get() noexcept(boost::math::is_floating_point<T>::value)
    {
       return boost::math::tools::epsilon<T>();
    }
 };
 template <class T, class Digits>
-struct series_factor_calc<T, Digits, std::true_type, std::false_type>
+struct series_factor_calc<T, Digits, boost::math::true_type, boost::math::false_type>
 {
-   static constexpr T get() noexcept(std::is_floating_point<T>::value)
+   BOOST_MATH_GPU_ENABLED static constexpr T get() noexcept(boost::math::is_floating_point<T>::value)
    {
-      return 1 / static_cast<T>(static_cast<std::uintmax_t>(1u) << (Digits::value - 1));
+      return 1 / static_cast<T>(static_cast<boost::math::uintmax_t>(1u) << (Digits::value - 1));
    }
 };
 template <class T, class Digits>
-struct series_factor_calc<T, Digits, std::false_type, std::true_type>
+struct series_factor_calc<T, Digits, boost::math::false_type, boost::math::true_type>
 {
-   static constexpr T get() noexcept(std::is_floating_point<T>::value)
+   BOOST_MATH_GPU_ENABLED static constexpr T get() noexcept(boost::math::is_floating_point<T>::value)
    {
       return boost::math::tools::epsilon<T>();
    }
 };
 
 template <class T, class Policy>
-inline constexpr T get_epsilon_imp(std::true_type const&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T get_epsilon_imp(boost::math::true_type const&) noexcept(boost::math::is_floating_point<T>::value)
 {
-   static_assert(std::numeric_limits<T>::is_specialized, "std::numeric_limits<T>::is_specialized");
-   static_assert(std::numeric_limits<T>::radix == 2, "std::numeric_limits<T>::radix == 2");
+   static_assert(boost::math::numeric_limits<T>::is_specialized, "boost::math::numeric_limits<T>::is_specialized");
+   static_assert(boost::math::numeric_limits<T>::radix == 2, "boost::math::numeric_limits<T>::radix == 2");
 
    typedef typename boost::math::policies::precision<T, Policy>::type p_t;
-   typedef std::integral_constant<bool, p_t::value <= std::numeric_limits<std::uintmax_t>::digits> is_small_int;
-   typedef std::integral_constant<bool, p_t::value >= std::numeric_limits<T>::digits> is_default_value;
+   typedef boost::math::integral_constant<bool, p_t::value <= boost::math::numeric_limits<boost::math::uintmax_t>::digits> is_small_int;
+   typedef boost::math::integral_constant<bool, p_t::value >= boost::math::numeric_limits<T>::digits> is_default_value;
    return series_factor_calc<T, p_t, is_small_int, is_default_value>::get();
 }
 
 template <class T, class Policy>
-inline constexpr T get_epsilon_imp(std::false_type const&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T get_epsilon_imp(boost::math::false_type const&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return tools::epsilon<T>();
 }
@@ -891,9 +916,9 @@ inline constexpr T get_epsilon_imp(std::false_type const&) noexcept(std::is_floa
 } // namespace detail
 
 template <class T, class Policy>
-inline constexpr T get_epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T get_epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
-   typedef std::integral_constant<bool, (std::numeric_limits<T>::is_specialized && (std::numeric_limits<T>::radix == 2)) > tag_type;
+   typedef boost::math::integral_constant<bool, (boost::math::numeric_limits<T>::is_specialized && (boost::math::numeric_limits<T>::radix == 2)) > tag_type;
    return detail::get_epsilon_imp<T, Policy>(tag_type());
 }
 
@@ -910,8 +935,8 @@ template <class A1,
           class A9,
           class A10,
           class A11>
-char test_is_policy(const policy<A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11>*);
-double test_is_policy(...);
+BOOST_MATH_GPU_ENABLED char test_is_policy(const policy<A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11>*);
+BOOST_MATH_GPU_ENABLED double test_is_policy(...);
 
 template <typename P>
 class is_policy_imp
@@ -927,7 +952,7 @@ class is_policy
 {
 public:
    static constexpr bool value = boost::math::policies::detail::is_policy_imp<P>::value;
-   using type = std::integral_constant<bool, value>;
+   using type = boost::math::integral_constant<bool, value>;
 };
 
 //
@@ -937,20 +962,20 @@ template <class Policy>
 struct constructor_error_check
 {
    using domain_error_type = typename Policy::domain_error_type;
-   using type = typename std::conditional<
+   using type = typename boost::math::conditional<
       (domain_error_type::value == throw_on_error) || (domain_error_type::value == user_error) || (domain_error_type::value == errno_on_error),
-      std::true_type,
-      std::false_type>::type;
+      boost::math::true_type,
+      boost::math::false_type>::type;
 };
 
 template <class Policy>
 struct method_error_check
 {
    using domain_error_type = typename Policy::domain_error_type;
-   using type = typename std::conditional<
+   using type = typename boost::math::conditional<
       (domain_error_type::value == throw_on_error),
-      std::false_type,
-      std::true_type>::type;
+      boost::math::false_type,
+      boost::math::true_type>::type;
 };
 //
 // Does the Policy ever throw on error?
diff --git a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp
index 2df07b6ecc..77f2fbf060 100644
--- a/include/boost/math/quadrature/detail/exp_sinh_detail.hpp
+++ b/include/boost/math/quadrature/detail/exp_sinh_detail.hpp
@@ -7,6 +7,10 @@
 #ifndef BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP
 #define BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <vector>
 #include <typeinfo>
@@ -541,4 +545,1458 @@ void exp_sinh_detail<Real, Policy>::init(const std::integral_constant<int, 4>&)
 }
 }
 }
-#endif
+
+#endif // BOOST_MATH_HAS_NVRTC
+
+#ifdef BOOST_MATH_ENABLE_CUDA // BOOST_MATH_ENABLE_CUDA
+
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+namespace boost { 
+namespace math { 
+namespace quadrature {
+namespace detail {
+
+// In the CUDA case we break these down into a series of fixed size arrays and then make a pointer to the arrays
+// We can't use a 2D array because it takes up far too much memory that is primarily wasted space
+
+__constant__ float m_abscissas_float_1[9] =
+      { 3.47876573e-23f, 5.62503650e-09f, 9.95706124e-04f, 9.67438487e-02f, 7.43599217e-01f, 4.14293205e+00f, 
+        1.08086768e+02f, 4.56291316e+05f, 2.70123007e+15f, };
+
+__constant__ float m_abscissas_float_2[8] =
+      { 2.41870864e-14f, 1.02534662e-05f, 1.65637566e-02f, 3.11290799e-01f, 1.64691269e+00f, 1.49800773e+01f, 
+        2.57724301e+03f, 2.24833766e+09f, };
+
+__constant__ float m_abscissas_float_3[16] =
+      { 3.24983286e-18f, 2.51095186e-11f, 3.82035773e-07f, 1.33717837e-04f, 4.80260650e-03f, 4.41526928e-02f, 
+        1.83045938e-01f, 4.91960276e-01f, 1.10322609e+00f, 2.53681744e+00f, 7.39791792e+00f, 3.59560256e+01f, 
+        4.36061333e+02f, 2.49501460e+04f, 1.89216933e+07f, 1.03348694e+12f, };
+
+__constant__ float m_abscissas_float_4[33] =
+      { 1.51941172e-20f, 3.70201714e-16f, 9.67598102e-13f, 4.44773051e-10f, 5.28493928e-08f, 2.19158236e-06f, 
+        4.00799258e-05f, 3.88011529e-04f, 2.29325538e-03f, 9.25182629e-03f, 2.78117501e-02f, 6.67553298e-02f, 
+        1.35173168e-01f, 2.41374946e-01f, 3.94194704e-01f, 6.07196731e-01f, 9.06432514e-01f, 1.34481045e+00f, 
+        2.03268444e+00f, 3.21243032e+00f, 5.46310949e+00f, 1.03365745e+01f, 2.26486752e+01f, 6.03727778e+01f, 
+        2.08220266e+02f, 1.00431239e+03f, 7.47843388e+03f, 9.75279951e+04f, 2.61755592e+06f, 1.77776624e+08f, 
+        3.98255346e+10f, 4.13443763e+13f, 3.07708133e+17f, };
+
+__constant__ float m_abscissas_float_5[66] =
+      { 7.99409438e-22f, 2.41624595e-19f, 3.73461321e-17f, 3.19397902e-15f, 1.62042378e-13f, 5.18579386e-12f, 
+        1.10520072e-10f, 1.64548212e-09f, 1.78534009e-08f, 1.46529196e-07f, 9.40168786e-07f, 4.85507733e-06f, 
+        2.07038029e-05f, 7.45799409e-05f, 2.31536599e-04f, 6.30580368e-04f, 1.53035449e-03f, 3.35582040e-03f, 
+        6.73124842e-03f, 1.24856832e-02f, 2.16245309e-02f, 3.52720523e-02f, 5.45995171e-02f, 8.07587788e-02f, 
+        1.14840025e-01f, 1.57867103e-01f, 2.10837078e-01f, 2.74805391e-01f, 3.51015955e-01f, 4.41077540e-01f, 
+        5.47194016e-01f, 6.72466825e-01f, 8.21304567e-01f, 1.00000000e+00f, 1.21757511e+00f, 1.48706221e+00f, 
+        1.82750536e+00f, 2.26717507e+00f, 2.84887335e+00f, 3.63893880e+00f, 4.74299876e+00f, 6.33444194e+00f, 
+        8.70776542e+00f, 1.23825548e+01f, 1.83151803e+01f, 2.83510579e+01f, 4.62437776e+01f, 8.00917327e+01f, 
+        1.48560852e+02f, 2.97989725e+02f, 6.53443372e+02f, 1.58584068e+03f, 4.31897162e+03f, 1.34084311e+04f, 
+        4.83003053e+04f, 2.05969943e+05f, 1.06363880e+06f, 6.82457850e+06f, 5.60117371e+07f, 6.07724622e+08f, 
+        9.04813016e+09f, 1.92834507e+11f, 6.17122515e+12f, 3.13089095e+14f, 2.67765347e+16f, 4.13865153e+18f, };
+
+__constant__ float m_abscissas_float_6[132] =
+      { 1.70893932e-22f, 3.56621447e-21f, 6.19138882e-20f, 9.04299298e-19f, 1.12287188e-17f, 1.19706303e-16f, 
+        1.10583090e-15f, 8.92931857e-15f, 6.35404710e-14f, 4.01527389e-13f, 2.26955738e-12f, 1.15522811e-11f, 
+        5.32913181e-11f, 2.24130967e-10f, 8.64254491e-10f, 3.07161058e-09f, 1.01117742e-08f, 3.09775637e-08f, 
+        8.87004371e-08f, 2.38368096e-07f, 6.03520392e-07f, 1.44488635e-06f, 3.28212299e-06f, 7.09655821e-06f, 
+        1.46494407e-05f, 2.89537394e-05f, 5.49357161e-05f, 1.00313252e-04f, 1.76700203e-04f, 3.00920507e-04f, 
+        4.96484845e-04f, 7.95150594e-04f, 1.23845781e-03f, 1.87911525e-03f, 2.78210510e-03f, 4.02538552e-03f, 
+        5.70009588e-03f, 7.91020800e-03f, 1.07716137e-02f, 1.44106884e-02f, 1.89624177e-02f, 2.45682104e-02f, 
+        3.13735515e-02f, 3.95256605e-02f, 4.91713196e-02f, 6.04550279e-02f, 7.35176150e-02f, 8.84954195e-02f, 
+        1.05520113e-01f, 1.24719213e-01f, 1.46217318e-01f, 1.70138063e-01f, 1.96606781e-01f, 2.25753880e-01f,
+        2.57718900e-01f, 2.92655274e-01f, 3.30735809e-01f, 3.72158929e-01f, 4.17155794e-01f, 4.65998399e-01f,
+        5.19008863e-01f, 5.76570161e-01f, 6.39138643e-01f, 7.07258781e-01f, 7.81580731e-01f, 8.62881450e-01f, 
+        9.52090320e-01f, 1.05032052e+00f, 1.15890775e+00f, 1.27945836e+00f, 1.41390963e+00f, 1.56460576e+00f,
+        1.73439430e+00f, 1.92674937e+00f, 2.14593012e+00f, 2.39718593e+00f, 2.68702407e+00f, 3.02356133e+00f,
+        3.41698950e+00f, 3.88019661e+00f, 4.42960272e+00f, 5.08629455e+00f, 5.87757956e+00f, 6.83913514e+00f,
+        8.01801085e+00f, 9.47686632e+00f, 1.13000199e+01f, 1.36021823e+01f, 1.65412214e+01f, 2.03370584e+01f, 
+        2.53000199e+01f, 3.18739815e+01f, 4.07030054e+01f, 5.27358913e+01f, 6.93929374e+01f, 9.28366010e+01f,
+        1.26418926e+02f, 1.75435645e+02f, 2.48423411e+02f, 3.59440052e+02f, 5.32165336e+02f, 8.07455844e+02f,
+        1.25762341e+03f, 2.01416017e+03f, 3.32313676e+03f, 5.65930306e+03f, 9.96877263e+03f, 1.82030939e+04f,
+        3.45378531e+04f, 6.82619916e+04f, 1.40913380e+05f, 3.04680844e+05f, 6.92095957e+05f, 1.65694484e+06f,
+        4.19519229e+06f, 1.12739016e+07f, 3.22814282e+07f, 9.88946136e+07f, 3.25562103e+08f, 1.15706659e+09f,
+        4.46167708e+09f, 1.87647826e+10f, 8.65629909e+10f, 4.40614549e+11f, 2.49049013e+12f, 1.57380011e+13f,
+        1.11990629e+14f, 9.04297390e+14f, 8.35377903e+15f, 8.90573552e+16f, 1.10582857e+18f, 1.61514650e+19f, };
+
+__constant__ float m_abscissas_float_7[263] =
+      { 7.75845008e-23f, 3.71846701e-22f, 1.69833677e-21f, 7.40284853e-21f, 3.08399399e-20f, 1.22962599e-19f, 
+        4.69855182e-19f, 1.72288020e-18f, 6.07012059e-18f, 2.05742924e-17f, 6.71669437e-17f, 2.11441966e-16f, 
+        6.42566550e-16f, 1.88715605e-15f, 5.36188198e-15f, 1.47533056e-14f, 3.93507835e-14f, 1.01841667e-13f,
+        2.55981752e-13f, 6.25453236e-13f, 1.48683211e-12f, 3.44173601e-12f, 7.76421789e-12f, 1.70831312e-11f,
+        3.66877698e-11f, 7.69632540e-11f, 1.57822184e-10f, 3.16577320e-10f, 6.21604166e-10f, 1.19551931e-09f,
+        2.25364361e-09f, 4.16647469e-09f, 7.55905964e-09f, 1.34658870e-08f, 2.35675936e-08f, 4.05458117e-08f,
+        6.86052525e-08f, 1.14227960e-07f, 1.87243781e-07f, 3.02323521e-07f, 4.81026747e-07f, 7.54564302e-07f,
+        1.16746531e-06f, 1.78236867e-06f, 2.68618781e-06f, 3.99792342e-06f, 5.87841837e-06f, 8.54236163e-06f,
+        1.22728487e-05f, 1.74387947e-05f, 2.45154696e-05f, 3.41083807e-05f, 4.69806683e-05f, 6.40841007e-05f,
+        8.65936597e-05f, 1.15945600e-04f, 1.53878746e-04f, 2.02478652e-04f, 2.64224143e-04f, 3.42035594e-04f,
+        4.39324211e-04f, 5.60041454e-04f, 7.08727668e-04f, 8.90558896e-04f, 1.11139085e-03f, 1.37779898e-03f,
+        1.69711358e-03f, 2.07744903e-03f, 2.52772622e-03f, 3.05768742e-03f, 3.67790298e-03f, 4.39976940e-03f,
+        5.23549846e-03f, 6.19809738e-03f, 7.30134015e-03f, 8.55973022e-03f, 9.98845520e-03f, 1.16033342e-02f,
+        1.34207587e-02f, 1.54576276e-02f, 1.77312787e-02f, 2.02594158e-02f, 2.30600348e-02f, 2.61513493e-02f,
+        2.95517158e-02f, 3.32795626e-02f, 3.73533204e-02f, 4.17913590e-02f, 4.66119283e-02f, 5.18331072e-02f,
+        5.74727595e-02f, 6.35484986e-02f, 7.00776615e-02f, 7.70772927e-02f, 8.45641386e-02f, 9.25546518e-02f,
+        1.01065008e-01f, 1.10111132e-01f, 1.19708739e-01f, 1.29873379e-01f, 1.40620505e-01f, 1.51965539e-01f,
+        1.63923958e-01f, 1.76511391e-01f, 1.89743720e-01f, 2.03637197e-01f, 2.18208574e-01f, 2.33475238e-01f,
+        2.49455360e-01f, 2.66168055e-01f, 2.83633553e-01f, 3.01873381e-01f, 3.20910560e-01f, 3.40769809e-01f,
+        3.61477772e-01f, 3.83063247e-01f, 4.05557445e-01f, 4.28994258e-01f, 4.53410546e-01f, 4.78846448e-01f,
+        5.05345717e-01f, 5.32956079e-01f, 5.61729623e-01f, 5.91723220e-01f, 6.22998983e-01f, 6.55624768e-01f,
+        6.89674714e-01f, 7.25229845e-01f, 7.62378724e-01f, 8.01218171e-01f, 8.41854062e-01f, 8.84402205e-01f,
+        9.28989312e-01f, 9.75754080e-01f, 1.02484839e+00f, 1.07643865e+00f, 1.13070727e+00f, 1.18785434e+00f,
+        1.24809950e+00f, 1.31168403e+00f, 1.37887320e+00f, 1.44995892e+00f, 1.52526270e+00f, 1.60513906e+00f,
+        1.68997931e+00f, 1.78021589e+00f, 1.87632722e+00f, 1.97884333e+00f, 2.08835213e+00f, 2.20550671e+00f,
+        2.33103353e+00f, 2.46574193e+00f, 2.61053497e+00f, 2.76642183e+00f, 2.93453226e+00f, 3.11613304e+00f,
+        3.31264716e+00f, 3.52567596e+00f, 3.75702486e+00f, 4.00873326e+00f, 4.28310945e+00f, 4.58277134e+00f,
+        4.91069419e+00f, 5.27026666e+00f, 5.66535674e+00f, 6.10038953e+00f, 6.58043928e+00f, 7.11133842e+00f,
+        7.69980735e+00f, 8.35360902e+00f, 9.08173387e+00f, 9.89462150e+00f, 1.08044272e+01f, 1.18253437e+01f,
+        1.29739897e+01f, 1.42698826e+01f, 1.57360130e+01f, 1.73995473e+01f, 1.92926887e+01f, 2.14537359e+01f,
+        2.39283915e+01f, 2.67713817e+01f, 3.00484719e+01f, 3.38389827e+01f, 3.82389447e+01f, 4.33650689e+01f,
+        4.93597649e+01f, 5.63975118e+01f, 6.46929803e+01f, 7.45114359e+01f, 8.61821250e+01f, 1.00115581e+02f,
+        1.16826112e+02f, 1.36961158e+02f, 1.61339834e+02f, 1.91003781e+02f, 2.27284639e+02f, 2.71894067e+02f,
+        3.27044548e+02f, 3.95612465e+02f, 4.81359585e+02f, 5.89235756e+02f, 7.25795284e+02f, 8.99773468e+02f,
+        1.12289036e+03f, 1.41097920e+03f, 1.78558211e+03f, 2.27622329e+03f, 2.92367233e+03f, 3.78466551e+03f,
+        4.93879227e+03f, 6.49862329e+03f, 8.62473434e+03f, 1.15481896e+04f, 1.56044945e+04f, 2.12853507e+04f,
+        2.93183077e+04f, 4.07905708e+04f, 5.73434125e+04f, 8.14806753e+04f, 1.17063646e+05f, 1.70113785e+05f,
+        2.50129854e+05f, 3.72274789e+05f, 5.61051155e+05f, 8.56556497e+05f, 1.32526810e+06f, 2.07888648e+06f,
+        3.30771485e+06f, 5.34063130e+06f, 8.75442405e+06f, 1.45761434e+07f, 2.46634599e+07f, 4.24311457e+07f,
+        7.42617251e+07f, 1.32291588e+08f, 2.40011058e+08f, 4.43725882e+08f, 8.36456588e+08f, 1.60874083e+09f,
+        3.15878598e+09f, 6.33624483e+09f, 1.29932136e+10f, 2.72570398e+10f, 5.85372779e+10f, 1.28795973e+11f,
+        2.90551047e+11f, 6.72570892e+11f, 1.59884056e+12f, 3.90652847e+12f, 9.81916374e+12f, 2.54124546e+13f,
+        6.77814197e+13f, 1.86501681e+14f, 5.29897885e+14f, 1.55625904e+15f, 4.72943011e+15f, 1.48882761e+16f,
+        4.86043448e+16f, 1.64741373e+17f, 5.80423410e+17f, 2.12831536e+18f, 8.13255421e+18f, };
+
+__constant__ float m_abscissas_float_8[527] =
+      { 5.20331508e-23f, 1.15324162e-22f, 2.52466875e-22f, 5.46028730e-22f, 1.16690465e-21f, 2.46458927e-21f,
+        5.14543768e-21f, 1.06205431e-20f, 2.16767715e-20f, 4.37564009e-20f, 8.73699691e-20f, 1.72595588e-19f,
+        3.37377643e-19f, 6.52669145e-19f, 1.24976973e-18f, 2.36916845e-18f, 4.44691383e-18f, 8.26580373e-18f,
+        1.52174118e-17f, 2.77517606e-17f, 5.01415830e-17f, 8.97689232e-17f, 1.59270821e-16f, 2.80084735e-16f,
+        4.88253693e-16f, 8.43846463e-16f, 1.44610939e-15f, 2.45762595e-15f, 4.14251017e-15f, 6.92627770e-15f,
+        1.14889208e-14f, 1.89084205e-14f, 3.08802476e-14f, 5.00504297e-14f, 8.05169965e-14f, 1.28579121e-13f,
+        2.03847833e-13f, 3.20880532e-13f, 5.01568631e-13f, 7.78600100e-13f, 1.20044498e-12f, 1.83848331e-12f,
+        2.79712543e-12f, 4.22808302e-12f, 6.35035779e-12f, 9.47805307e-12f, 1.40588174e-11f, 2.07266430e-11f,
+        3.03739182e-11f, 4.42491437e-11f, 6.40886341e-11f, 9.22929507e-11f, 1.32161843e-10f, 1.88205259e-10f,
+        2.66552657e-10f, 3.75488615e-10f, 5.26149742e-10f, 7.33426418e-10f, 1.01712318e-09f, 1.40344387e-09f,
+        1.92688222e-09f, 2.63261606e-09f, 3.57952343e-09f, 4.84396276e-09f, 6.52448685e-09f, 8.74769197e-09f,
+        1.16754399e-08f, 1.55137320e-08f, 2.05235608e-08f, 2.70341184e-08f, 3.54587968e-08f, 4.63144836e-08f,
+        6.02447248e-08f, 7.80474059e-08f, 1.00707687e-07f, 1.29437018e-07f, 1.65719157e-07f, 2.11364220e-07f,
+        2.68571894e-07f, 3.40005066e-07f, 4.28875221e-07f, 5.39041105e-07f, 6.75122241e-07f, 8.42629031e-07f,
+        1.04811127e-06f, 1.29932703e-06f, 1.60543396e-06f, 1.97720518e-06f, 2.42727196e-06f, 2.97039558e-06f,
+        3.62377065e-06f, 4.40736236e-06f, 5.34428013e-06f, 6.46118994e-06f, 7.78876789e-06f, 9.36219733e-06f,
+        1.12217116e-05f, 1.34131848e-05f, 1.59887725e-05f, 1.90076038e-05f, 2.25365270e-05f, 2.66509096e-05f,
+        3.14354940e-05f, 3.69853096e-05f, 4.34066412e-05f, 5.08180543e-05f, 5.93514765e-05f, 6.91533342e-05f,
+        8.03857429e-05f, 9.32277499e-05f, 1.07876627e-04f, 1.24549208e-04f, 1.43483273e-04f, 1.64938971e-04f,
+        1.89200275e-04f, 2.16576471e-04f, 2.47403671e-04f, 2.82046341e-04f, 3.20898851e-04f, 3.64387021e-04f,
+        4.12969671e-04f, 4.67140163e-04f, 5.27427922e-04f, 5.94399942e-04f, 6.68662248e-04f, 7.50861330e-04f,
+        8.41685517e-04f, 9.41866302e-04f, 1.05217960e-03f, 1.17344692e-03f, 1.30653650e-03f, 1.45236427e-03f,
+        1.61189482e-03f, 1.78614219e-03f, 1.97617055e-03f, 2.18309485e-03f, 2.40808123e-03f, 2.65234740e-03f,
+        2.91716284e-03f, 3.20384886e-03f, 3.51377855e-03f, 3.84837661e-03f, 4.20911898e-03f, 4.59753235e-03f,
+        5.01519359e-03f, 5.46372894e-03f, 5.94481312e-03f, 6.46016832e-03f, 7.01156301e-03f, 7.60081065e-03f,
+        8.22976829e-03f, 8.90033499e-03f, 9.61445021e-03f, 1.03740920e-02f, 1.11812753e-02f, 1.20380497e-02f, 
+        1.29464978e-02f, 1.39087327e-02f, 1.49268962e-02f, 1.60031562e-02f, 1.71397050e-02f, 1.83387564e-02f,
+        1.96025436e-02f, 2.09333170e-02f, 2.23333419e-02f, 2.38048956e-02f, 2.53502659e-02f, 2.69717481e-02f,
+        2.86716433e-02f, 3.04522558e-02f, 3.23158911e-02f, 3.42648538e-02f, 3.63014456e-02f, 3.84279634e-02f,
+        4.06466974e-02f, 4.29599296e-02f, 4.53699317e-02f, 4.78789641e-02f, 5.04892744e-02f, 5.32030959e-02f,
+        5.60226468e-02f, 5.89501290e-02f, 6.19877276e-02f, 6.51376099e-02f, 6.84019251e-02f, 7.17828036e-02f,
+        7.52823576e-02f, 7.89026802e-02f, 8.26458461e-02f, 8.65139116e-02f, 9.05089155e-02f, 9.46328794e-02f,
+        9.88878087e-02f, 1.03275694e-01f, 1.07798510e-01f, 1.12458223e-01f, 1.17256783e-01f, 1.22196135e-01f,
+        1.27278214e-01f, 1.32504950e-01f, 1.37878272e-01f, 1.43400107e-01f, 1.49072382e-01f, 1.54897032e-01f,
+        1.60875997e-01f, 1.67011231e-01f, 1.73304700e-01f, 1.79758387e-01f, 1.86374297e-01f, 1.93154462e-01f,
+        2.00100939e-01f, 2.07215821e-01f, 2.14501238e-01f, 2.21959362e-01f, 2.29592410e-01f, 2.37402653e-01f,
+        2.45392415e-01f, 2.53564085e-01f, 2.61920117e-01f, 2.70463037e-01f, 2.79195450e-01f, 2.88120044e-01f,
+        2.97239599e-01f, 3.06556989e-01f, 3.16075193e-01f, 3.25797297e-01f, 3.35726506e-01f, 3.45866147e-01f,
+        3.56219679e-01f, 3.66790698e-01f, 3.77582948e-01f, 3.88600328e-01f, 3.99846898e-01f, 4.11326892e-01f,
+        4.23044723e-01f, 4.35004995e-01f, 4.47212512e-01f, 4.59672288e-01f, 4.72389556e-01f, 4.85369781e-01f,
+        4.98618671e-01f, 5.12142186e-01f, 5.25946554e-01f, 5.40038281e-01f, 5.54424165e-01f, 5.69111309e-01f,
+        5.84107138e-01f, 5.99419409e-01f, 6.15056232e-01f, 6.31026081e-01f, 6.47337815e-01f, 6.64000696e-01f,
+        6.81024405e-01f, 6.98419060e-01f, 7.16195243e-01f, 7.34364016e-01f, 7.52936944e-01f, 7.71926120e-01f,
+        7.91344191e-01f, 8.11204381e-01f, 8.31520518e-01f, 8.52307069e-01f, 8.73579162e-01f, 8.95352625e-01f,
+        9.17644013e-01f, 9.40470650e-01f, 9.63850664e-01f, 9.87803022e-01f, 1.01234758e+00f, 1.03750512e+00f,
+        1.06329740e+00f, 1.08974721e+00f, 1.11687839e+00f, 1.14471595e+00f, 1.17328606e+00f, 1.20261614e+00f,
+        1.23273496e+00f, 1.26367264e+00f, 1.29546076e+00f, 1.32813247e+00f, 1.36172249e+00f, 1.39626730e+00f,
+        1.43180514e+00f, 1.46837616e+00f, 1.50602252e+00f, 1.54478848e+00f, 1.58472055e+00f, 1.62586760e+00f,
+        1.66828098e+00f, 1.71201469e+00f, 1.75712551e+00f, 1.80367319e+00f, 1.85172058e+00f, 1.90133388e+00f,
+        1.95258276e+00f, 2.00554062e+00f, 2.06028484e+00f, 2.11689693e+00f, 2.17546288e+00f, 2.23607339e+00f,
+        2.29882418e+00f, 2.36381627e+00f, 2.43115639e+00f, 2.50095725e+00f, 2.57333803e+00f, 2.64842468e+00f,
+        2.72635049e+00f, 2.80725648e+00f, 2.89129193e+00f, 2.97861498e+00f, 3.06939317e+00f, 3.16380413e+00f,
+        3.26203621e+00f, 3.36428929e+00f, 3.47077553e+00f, 3.58172026e+00f, 3.69736291e+00f, 3.81795798e+00f,
+        3.94377618e+00f, 4.07510558e+00f, 4.21225285e+00f, 4.35554468e+00f, 4.50532923e+00f, 4.66197775e+00f,
+        4.82588634e+00f, 4.99747780e+00f, 5.17720373e+00f, 5.36554672e+00f, 5.56302277e+00f, 5.77018396e+00f,
+        5.98762126e+00f, 6.21596768e+00f, 6.45590164e+00f, 6.70815069e+00f, 6.97349551e+00f, 7.25277437e+00f,
+        7.54688785e+00f, 7.85680417e+00f, 8.18356491e+00f, 8.52829128e+00f, 8.89219104e+00f, 9.27656603e+00f,
+        9.68282047e+00f, 1.01124700e+01f, 1.05671518e+01f, 1.10486353e+01f, 1.15588347e+01f, 1.20998217e+01f,
+        1.26738407e+01f, 1.32833247e+01f, 1.39309131e+01f, 1.46194716e+01f, 1.53521138e+01f, 1.61322255e+01f,
+        1.69634913e+01f, 1.78499242e+01f, 1.87958987e+01f, 1.98061868e+01f, 2.08859991e+01f, 2.20410294e+01f,
+        2.32775056e+01f, 2.46022448e+01f, 2.60227166e+01f, 2.75471124e+01f, 2.91844234e+01f, 3.09445281e+01f,
+        3.28382897e+01f, 3.48776660e+01f, 3.70758319e+01f, 3.94473180e+01f, 4.20081658e+01f, 4.47761023e+01f,
+        4.77707378e+01f, 5.10137879e+01f, 5.45293247e+01f, 5.83440613e+01f, 6.24876734e+01f, 6.69931639e+01f,
+        7.18972765e+01f, 7.72409663e+01f, 8.30699343e+01f, 8.94352364e+01f, 9.63939781e+01f, 1.04010108e+02f,
+        1.12355322e+02f, 1.21510104e+02f, 1.31564914e+02f, 1.42621552e+02f, 1.54794728e+02f, 1.68213867e+02f,
+        1.83025185e+02f, 1.99394097e+02f, 2.17507985e+02f, 2.37579409e+02f, 2.59849828e+02f, 2.84593917e+02f,
+        3.12124587e+02f, 3.42798827e+02f, 3.77024517e+02f, 4.15268384e+02f, 4.58065302e+02f, 5.06029199e+02f,
+        5.59865843e+02f, 6.20387872e+02f, 6.88532497e+02f, 7.65382367e+02f, 8.52190227e+02f, 9.50408087e+02f,
+        1.06172182e+03f, 1.18809220e+03f, 1.33180384e+03f, 1.49552334e+03f, 1.68236894e+03f, 1.89599367e+03f,
+        2.14068513e+03f, 2.42148533e+03f, 2.74433485e+03f, 3.11624675e+03f, 3.54551666e+03f, 4.04197722e+03f,
+        4.61730674e+03f, 5.28540457e+03f, 6.06284853e+03f, 6.96945350e+03f, 8.02895513e+03f, 9.26984864e+03f,
+        1.07264200e+04f, 1.24400169e+04f, 1.44606187e+04f, 1.68487805e+04f, 1.96780458e+04f, 2.30379493e+04f,
+        2.70377620e+04f, 3.18111749e+04f, 3.75221715e+04f, 4.43724093e+04f, 5.26105241e+04f, 6.25438881e+04f,
+        7.45535092e+04f, 8.91129656e+04f, 1.06812532e+05f, 1.28390012e+05f, 1.54770253e+05f, 1.87115940e+05f,
+        2.26893075e+05f, 2.75955654e+05f, 3.36655497e+05f, 4.11985149e+05f, 5.05764405e+05f, 6.22884544e+05f,
+        7.69629183e+05f, 9.54097173e+05f, 1.18676186e+06f, 1.48121324e+06f, 1.85514609e+06f, 2.33168052e+06f,
+        2.94113264e+06f, 3.72339780e+06f, 4.73116974e+06f, 6.03430539e+06f, 7.72576515e+06f, 9.92972861e+06f,
+        1.28127257e+07f, 1.65989637e+07f, 2.15915179e+07f, 2.82017465e+07f, 3.69902945e+07f, 4.87244884e+07f,
+        6.44590226e+07f, 8.56498776e+07f, 1.14315868e+08f, 1.53268759e+08f, 2.06442545e+08f, 2.79366798e+08f,
+        3.79850300e+08f, 5.18973079e+08f, 7.12532948e+08f, 9.83165083e+08f, 1.36346329e+09f, 1.90059962e+09f,
+        2.66319659e+09f, 3.75160395e+09f, 5.31334782e+09f, 7.56648043e+09f, 1.08350637e+10f, 1.56033907e+10f,
+        2.25993074e+10f, 3.29229832e+10f, 4.82470799e+10f, 7.11297379e+10f, 1.05506900e+11f, 1.57471442e+11f,
+        2.36513804e+11f, 3.57509889e+11f, 5.43926613e+11f, 8.33024431e+11f, 1.28435637e+12f, 1.99374510e+12f,
+        3.11642465e+12f, 4.90561997e+12f, 7.77731247e+12f, 1.24197380e+13f, 1.99798484e+13f, 3.23831600e+13f,
+        5.28864904e+13f, 8.70403770e+13f, 1.44377694e+14f, 2.41399528e+14f, 4.06896744e+14f, 6.91510621e+14f,
+        1.18504970e+15f, 2.04811559e+15f, 3.57034809e+15f, 6.27861398e+15f, 1.11397125e+16f, 1.99435267e+16f,
+        3.60337498e+16f, 6.57141972e+16f, 1.20980371e+17f, 2.24875057e+17f, 4.22089025e+17f, 8.00147402e+17f,
+        1.53216987e+18f, 2.96403754e+18f, 5.79389087e+18f, 1.14455803e+19f, 2.28537992e+19f, };
+
+__constant__ float* m_abscissas_float[8] = {
+    m_abscissas_float_1,
+    m_abscissas_float_2,
+    m_abscissas_float_3,
+    m_abscissas_float_4,
+    m_abscissas_float_5,
+    m_abscissas_float_6,
+    m_abscissas_float_7,
+    m_abscissas_float_8,
+};
+
+__constant__ float m_weights_float_1[9] = 
+      { 1.79979618e-21f, 1.07218106e-07f, 7.05786060e-03f, 2.72310168e-01f, 1.18863515e+00f, 8.77655464e+00f, 
+        5.33879432e+02f, 5.98892409e+06f, 9.60751551e+16f, };
+
+__constant__ float m_weights_float_2[8] =
+      { 7.59287827e-13f, 1.18886775e-04f, 7.27332179e-02f, 6.09156795e-01f, 2.71431234e+00f, 4.68800805e+01f, 
+        2.06437304e+04f, 4.85431236e+10f, };
+
+__constant__ float m_weights_float_3[16] = 
+      { 1.30963564e-16f, 6.14135316e-10f, 5.67743391e-06f, 1.21108690e-03f, 2.67259824e-02f, 1.54234107e-01f, 
+        4.23412860e-01f, 8.47913037e-01f, 1.73632925e+00f, 4.63203354e+00f, 1.88206826e+01f, 1.40643917e+02f, 
+        2.73736946e+03f, 2.55633252e+05f, 3.18438602e+08f, 2.86363931e+13f, };
+
+__constant__ float m_weights_float_4[33] = 
+      { 6.93769555e-19f, 1.31670336e-14f, 2.68107110e-11f, 9.60294960e-09f, 8.89417585e-07f, 2.87650015e-05f, 
+        4.10649371e-04f, 3.10797444e-03f, 1.43958814e-02f, 4.56980985e-02f, 1.08787148e-01f, 2.08910486e-01f,
+        3.43887471e-01f, 5.11338439e-01f, 7.19769211e-01f, 1.00073403e+00f, 1.42660267e+00f, 2.14966467e+00f,
+        3.50341221e+00f, 6.28632057e+00f, 1.26369961e+01f, 2.90949180e+01f, 7.91163114e+01f, 2.65103292e+02f,
+        1.15872311e+03f, 7.11886439e+03f, 6.77324248e+04f, 1.13081650e+06f, 3.88995005e+07f, 3.38857764e+09f,
+        9.74063570e+11f, 1.29789430e+15f, 1.24001927e+19f, };
+
+__constant__ float m_weights_float_5[66] = 
+      { 3.88541434e-20f, 1.03646493e-17f, 1.41388360e-15f, 1.06725054e-13f, 4.77908002e-12f, 1.34999345e-10f,
+        2.53970414e-09f, 3.33804787e-08f, 3.19755978e-07f, 2.31724882e-06f, 1.31302324e-05f, 5.98917639e-05f,
+        2.25650360e-04f, 7.18397083e-04f, 1.97196929e-03f, 4.75106406e-03f, 1.02072514e-02f, 1.98317011e-02f,
+        3.52844239e-02f, 5.81350403e-02f, 8.95955146e-02f, 1.30335749e-01f, 1.80445384e-01f, 2.39557131e-01f,
+        3.07102681e-01f, 3.82648608e-01f, 4.66260909e-01f, 5.58867257e-01f, 6.62616429e-01f, 7.81267733e-01f,
+        9.20677638e-01f, 1.08949034e+00f, 1.30019425e+00f, 1.57079633e+00f, 1.92752387e+00f, 2.40924883e+00f,
+        3.07485695e+00f, 4.01578082e+00f, 5.37784753e+00f, 7.40045071e+00f, 1.04890228e+01f, 1.53538346e+01f,
+        2.32861156e+01f, 3.67307348e+01f, 6.05296516e+01f, 1.04761593e+02f, 1.91598840e+02f, 3.72918009e+02f,
+        7.78738763e+02f, 1.76101294e+03f, 4.35837629e+03f, 1.19484066e+04f, 3.67841605e+04f, 1.29157756e+05f,
+        5.26424122e+05f, 2.54082527e+06f, 1.48545930e+07f, 1.07925566e+08f, 1.00317513e+09f, 1.23283860e+10f,
+        2.07922173e+11f, 5.01997049e+12f, 1.82006578e+14f, 1.04617001e+16f, 1.01373023e+18f, 1.77530238e+20f, };
+
+__constant__ float m_weights_float_6[132] = 
+      { 8.56958007e-21f, 1.68000718e-19f, 2.74008750e-18f, 3.75978801e-17f, 4.38589881e-16f, 4.39263787e-15f,
+        3.81223973e-14f, 2.89198757e-13f, 1.93338859e-12f, 1.14783389e-11f, 6.09544349e-11f, 2.91499607e-10f,
+        1.26339559e-09f, 4.99234840e-09f, 1.80872790e-08f, 6.03998541e-08f, 1.86829770e-07f, 5.37807971e-07f,
+        1.44704121e-06f, 3.65421571e-06f, 8.69454276e-06f, 1.95621880e-05f, 4.17628758e-05f, 8.48713297e-05f,
+        1.64680159e-04f, 3.05960283e-04f, 5.45748909e-04f, 9.36950301e-04f, 1.55189915e-03f, 2.48542560e-03f,
+        3.85690505e-03f, 5.81079770e-03f, 8.51529070e-03f, 1.21588421e-02f, 1.69446644e-02f, 2.30834400e-02f,
+        3.07847946e-02f, 4.02482241e-02f, 5.16542634e-02f, 6.51566792e-02f, 8.08763802e-02f, 9.88975757e-02f,
+        1.19266512e-01f, 1.41992893e-01f, 1.67053901e-01f, 1.94400532e-01f, 2.23965873e-01f, 2.55674859e-01f,
+        2.89455038e-01f, 3.25247905e-01f, 3.63020457e-01f, 4.02776696e-01f, 4.44568958e-01f, 4.88509042e-01f,
+        5.34779290e-01f, 5.83643845e-01f, 6.35460497e-01f, 6.90693630e-01f, 7.49928915e-01f, 8.13890578e-01f,
+        8.83462209e-01f, 9.59712352e-01f, 1.04392634e+00f, 1.13764623e+00f, 1.24272128e+00f, 1.36137177e+00f,
+        1.49627028e+00f, 1.65064527e+00f, 1.82841374e+00f, 2.03435175e+00f, 2.27431458e+00f, 2.55552245e+00f,
+        2.88693336e+00f, 3.27973254e+00f, 3.74797919e+00f, 4.30946679e+00f, 4.98687594e+00f, 5.80933099e+00f,
+        6.81451887e+00f, 8.05159726e+00f, 9.58522167e+00f, 1.15011733e+01f, 1.39143002e+01f, 1.69798351e+01f,
+        2.09096993e+01f, 2.59962450e+01f, 3.26472377e+01f, 4.14380231e+01f, 5.31903193e+01f, 6.90928164e+01f,
+        9.08883744e+01f, 1.21168895e+02f, 1.63847041e+02f, 2.24923217e+02f, 3.13754154e+02f, 4.45189215e+02f,
+        6.43236850e+02f, 9.47484116e+02f, 1.42457583e+03f, 2.18920236e+03f, 3.44338342e+03f, 5.55184130e+03f,
+        9.19045432e+03f, 1.56468513e+04f, 2.74471462e+04f, 4.97037777e+04f, 9.31107740e+04f, 1.80835335e+05f,
+        3.64968793e+05f, 7.67360053e+05f, 1.68525439e+06f, 3.87686515e+06f, 9.37022570e+06f, 2.38705733e+07f,
+        6.43128750e+07f, 1.83920179e+08f, 5.60444636e+08f, 1.82722217e+09f, 6.40182180e+09f, 2.42153053e+10f,
+        9.93804949e+10f, 4.44863150e+11f, 2.18425069e+12f, 1.18337660e+13f, 7.11948688e+13f, 4.78870731e+14f,
+        3.62710215e+15f, 3.11747341e+16f, 3.06542975e+17f, 3.47854955e+18f, 4.59768243e+19f, 7.14806140e+20f, };
+
+__constant__ float m_weights_float_7[263] = 
+      { 3.95175890e-21f, 1.83575349e-20f, 8.12661397e-20f, 3.43336935e-19f, 1.38634563e-18f, 5.35757029e-18f, 
+        1.98424944e-17f, 7.05221126e-17f, 2.40827550e-16f, 7.91175869e-16f, 2.50347754e-15f, 7.63871031e-15f,
+        2.25003103e-14f, 6.40502166e-14f, 1.76389749e-13f, 4.70424252e-13f, 1.21618334e-12f, 3.05082685e-12f,
+        7.43273471e-12f, 1.76028616e-11f, 4.05602375e-11f, 9.10055013e-11f, 1.98994391e-10f, 4.24390078e-10f,
+        8.83436580e-10f, 1.79636925e-09f, 3.57059250e-09f, 6.94247187e-09f, 1.32133371e-08f, 2.46332536e-08f,
+        4.50110843e-08f, 8.06630537e-08f, 1.41856144e-07f, 2.44958654e-07f, 4.15579069e-07f, 6.93056106e-07f,
+        1.13675616e-06f, 1.83473665e-06f, 2.91544023e-06f, 4.56318858e-06f, 7.03833675e-06f, 1.07030190e-05f,
+        1.60534529e-05f, 2.37597559e-05f, 3.47141604e-05f, 5.00883685e-05f, 7.14005734e-05f, 1.00592372e-04f,
+        1.40115414e-04f, 1.93027181e-04f, 2.63094779e-04f, 3.54905080e-04f, 4.73978972e-04f, 6.26886955e-04f,
+        8.21362793e-04f, 1.06641153e-03f, 1.37240787e-03f, 1.75118071e-03f, 2.21607971e-03f, 2.78201983e-03f,
+        3.46550010e-03f, 4.28459361e-03f, 5.25890609e-03f, 6.40950150e-03f, 7.75879384e-03f, 9.33040551e-03f,
+        1.11489935e-02f, 1.32400455e-02f, 1.56296499e-02f, 1.83442433e-02f, 2.14103400e-02f, 2.48542509e-02f,
+        2.87017958e-02f, 3.29780164e-02f, 3.77068968e-02f, 4.29110964e-02f, 4.86117029e-02f, 5.48280093e-02f,
+        6.15773214e-02f, 6.88747982e-02f, 7.67333308e-02f, 8.51634602e-02f, 9.41733378e-02f, 1.03768728e-01f,
+        1.13953051e-01f, 1.24727473e-01f, 1.36091031e-01f, 1.48040798e-01f, 1.60572082e-01f, 1.73678660e-01f,
+        1.87353038e-01f, 2.01586736e-01f, 2.16370598e-01f, 2.31695113e-01f, 2.47550758e-01f, 2.63928342e-01f,
+        2.80819365e-01f, 2.98216379e-01f, 3.16113348e-01f, 3.34506011e-01f, 3.53392244e-01f, 3.72772414e-01f,
+        3.92649735e-01f, 4.13030618e-01f, 4.33925021e-01f, 4.55346789e-01f, 4.77314001e-01f, 4.99849320e-01f,
+        5.22980337e-01f, 5.46739932e-01f, 5.71166640e-01f, 5.96305036e-01f, 6.22206131e-01f, 6.48927802e-01f,
+        6.76535247e-01f, 7.05101473e-01f, 7.34707835e-01f, 7.65444619e-01f, 7.97411688e-01f, 8.30719192e-01f,
+        8.65488366e-01f, 9.01852407e-01f, 9.39957463e-01f, 9.79963735e-01f, 1.02204672e+00f, 1.06639858e+00f,
+        1.11322974e+00f, 1.16277062e+00f, 1.21527359e+00f, 1.27101525e+00f, 1.33029891e+00f, 1.39345744e+00f,
+        1.46085648e+00f, 1.53289803e+00f, 1.61002461e+00f, 1.69272386e+00f, 1.78153384e+00f, 1.87704900e+00f,
+        1.97992701e+00f, 2.09089644e+00f, 2.21076567e+00f, 2.34043290e+00f, 2.48089770e+00f, 2.63327413e+00f,
+        2.79880590e+00f, 2.97888368e+00f, 3.17506505e+00f, 3.38909744e+00f, 3.62294469e+00f, 3.87881764e+00f,
+        4.15920968e+00f, 4.46693789e+00f, 4.80519096e+00f, 5.17758497e+00f, 5.58822853e+00f, 6.04179895e+00f,
+        6.54363157e+00f, 7.09982467e+00f, 7.71736306e+00f, 8.40426388e+00f, 9.16974906e+00f, 1.00244499e+01f,
+        1.09806502e+01f, 1.20525758e+01f, 1.32567410e+01f, 1.46123627e+01f, 1.61418586e+01f, 1.78714466e+01f,
+        1.98318690e+01f, 2.20592694e+01f, 2.45962577e+01f, 2.74932084e+01f, 3.08098460e+01f, 3.46171893e+01f,
+        3.89999428e+01f, 4.40594471e+01f, 4.99173320e+01f, 5.67200545e+01f, 6.46445583e+01f, 7.39053537e+01f,
+        8.47634121e+01f, 9.75373786e+01f, 1.12617765e+02f, 1.30484989e+02f, 1.51732386e+02f, 1.77095712e+02f,
+        2.07491096e+02f, 2.44064119e+02f, 2.88253545e+02f, 3.41874461e+02f, 4.07227291e+02f, 4.87241400e+02f,
+        5.85665251e+02f, 7.07319497e+02f, 8.58435639e+02f, 1.04711167e+03f, 1.28392853e+03f, 1.58278901e+03f,
+        1.96206607e+03f, 2.44618436e+03f, 3.06781187e+03f, 3.87091688e+03f, 4.91505977e+03f, 6.28145970e+03f,
+        8.08162997e+03f, 1.04697579e+04f, 1.36605846e+04f, 1.79554230e+04f, 2.37803156e+04f, 3.17424455e+04f,
+        4.27142204e+04f, 5.79596727e+04f, 7.93261335e+04f, 1.09537503e+05f, 1.52647130e+05f, 2.14743829e+05f,
+        3.05063335e+05f, 4.37755687e+05f, 6.34724899e+05f, 9.30240305e+05f, 1.37850753e+06f, 2.06623977e+06f,
+        3.13377596e+06f, 4.81098405e+06f, 7.47905793e+06f, 1.17782423e+07f, 1.87980927e+07f, 3.04180655e+07f,
+        4.99257437e+07f, 8.31551852e+07f, 1.40614107e+08f, 2.41519712e+08f, 4.21576502e+08f, 7.48209440e+08f,
+        1.35089892e+09f, 2.48263348e+09f, 4.64662007e+09f, 8.86235204e+09f, 1.72348930e+10f, 3.41967381e+10f,
+        6.92714904e+10f, 1.43352142e+11f, 3.03269524e+11f, 6.56345865e+11f, 1.45422052e+12f, 3.30099910e+12f,
+        7.68267630e+12f, 1.83474885e+13f, 4.49980389e+13f, 1.13430702e+14f, 2.94148450e+14f, 7.85402504e+14f,
+        2.16127995e+15f, 6.13534293e+15f, 1.79847736e+16f, 5.44944507e+16f, 1.70858922e+17f, 5.54922744e+17f,
+        1.86905990e+18f, 6.53599225e+18f, 2.37582887e+19f, 8.98810682e+19f, 3.54341330e+20f, };
+
+__constant__ float m_weights_float_8[527] = 
+      { 2.67108015e-21f, 5.82833463e-21f, 1.25616316e-20f, 2.67469785e-20f, 5.62745845e-20f, 1.17014394e-19f, 
+        2.40511019e-19f, 4.88739481e-19f, 9.82072303e-19f, 1.95168062e-18f, 3.83661097e-18f, 7.46163208e-18f,
+        1.43594942e-17f, 2.73485792e-17f, 5.15573612e-17f, 9.62223075e-17f, 1.77810682e-16f, 3.25389618e-16f,
+        5.89765054e-16f, 1.05888451e-15f, 1.88354538e-15f, 3.31989417e-15f, 5.79902273e-15f, 1.00398818e-14f,
+        1.72308010e-14f, 2.93186753e-14f, 4.94655967e-14f, 8.27635884e-14f, 1.37343706e-13f, 2.26082511e-13f,
+        3.69205736e-13f, 5.98228147e-13f, 9.61866975e-13f, 1.53484658e-12f, 2.43090464e-12f, 3.82185577e-12f,
+        5.96531965e-12f, 9.24474797e-12f, 1.42267754e-11f, 2.17427910e-11f, 3.30041201e-11f, 4.97635091e-11f,
+        7.45399354e-11f, 1.10929412e-10f, 1.64031748e-10f, 2.41032586e-10f, 3.51991946e-10f, 5.10905560e-10f,
+        7.37124150e-10f, 1.05723929e-09f, 1.50757352e-09f, 2.13744796e-09f, 3.01344401e-09f, 4.22492806e-09f,
+        5.89117093e-09f, 8.17046854e-09f, 1.12717587e-08f, 1.54693324e-08f, 2.11213594e-08f, 2.86930859e-08f,
+        3.87857241e-08f, 5.21722335e-08f, 6.98414017e-08f, 9.30518593e-08f, 1.23397923e-07f, 1.62889442e-07f,
+        2.14048123e-07f, 2.80023159e-07f, 3.64729321e-07f, 4.73011070e-07f, 6.10836627e-07f, 7.85526363e-07f,
+        1.00602028e-06f, 1.28318979e-06f, 1.63019938e-06f, 2.06292424e-06f, 2.60043021e-06f, 3.26552286e-06f,
+        4.08537275e-06f, 5.09222413e-06f, 6.32419483e-06f, 7.82617466e-06f, 9.65083023e-06f, 1.18597236e-05f,
+        1.45245521e-05f, 1.77285168e-05f, 2.15678251e-05f, 2.61533347e-05f, 3.16123436e-05f, 3.80905295e-05f,
+        4.57540432e-05f, 5.47917575e-05f, 6.54176707e-05f, 7.78734661e-05f, 9.24312223e-05f, 1.09396271e-04f,
+        1.29110197e-04f, 1.51953965e-04f, 1.78351176e-04f, 2.08771424e-04f, 2.43733750e-04f, 2.83810168e-04f,
+        3.29629253e-04f, 3.81879756e-04f, 4.41314233e-04f, 5.08752659e-04f, 5.85085996e-04f, 6.71279692e-04f,
+        7.68377076e-04f, 8.77502620e-04f, 9.99865030e-04f, 1.13676015e-03f, 1.28957360e-03f, 1.45978322e-03f,
+        1.64896113e-03f, 1.85877551e-03f, 2.09099200e-03f, 2.34747474e-03f, 2.63018699e-03f, 2.94119122e-03f,
+        3.28264890e-03f, 3.65681963e-03f, 4.06605991e-03f, 4.51282135e-03f, 4.99964828e-03f, 5.52917497e-03f,
+        6.10412222e-03f, 6.72729343e-03f, 7.40157020e-03f, 8.12990738e-03f, 8.91532760e-03f, 9.76091537e-03f,
+        1.06698107e-02f, 1.16452023e-02f, 1.26903202e-02f, 1.38084285e-02f, 1.50028172e-02f, 1.62767940e-02f,
+        1.76336759e-02f, 1.90767806e-02f, 2.06094173e-02f, 2.22348784e-02f, 2.39564300e-02f, 2.57773028e-02f,
+        2.77006834e-02f, 2.97297055e-02f, 3.18674406e-02f, 3.41168899e-02f, 3.64809756e-02f, 3.89625331e-02f,
+        4.15643030e-02f, 4.42889240e-02f, 4.71389254e-02f, 5.01167213e-02f, 5.32246039e-02f, 5.64647382e-02f,
+        5.98391571e-02f, 6.33497571e-02f, 6.69982939e-02f, 7.07863800e-02f, 7.47154815e-02f, 7.87869165e-02f,
+        8.30018539e-02f, 8.73613125e-02f, 9.18661613e-02f, 9.65171203e-02f, 1.01314762e-01f, 1.06259513e-01f,
+        1.11351656e-01f, 1.16591337e-01f, 1.21978563e-01f, 1.27513213e-01f, 1.33195039e-01f, 1.39023671e-01f,
+        1.44998628e-01f, 1.51119321e-01f, 1.57385061e-01f, 1.63795066e-01f, 1.70348473e-01f, 1.77044340e-01f,
+        1.83881662e-01f, 1.90859375e-01f, 1.97976367e-01f, 2.05231492e-01f, 2.12623572e-01f, 2.20151415e-01f,
+        2.27813822e-01f, 2.35609599e-01f, 2.43537565e-01f, 2.51596569e-01f, 2.59785494e-01f, 2.68103274e-01f,
+        2.76548903e-01f, 2.85121445e-01f, 2.93820047e-01f, 3.02643950e-01f, 3.11592502e-01f, 3.20665165e-01f,
+        3.29861530e-01f, 3.39181328e-01f, 3.48624439e-01f, 3.58190905e-01f, 3.67880941e-01f, 3.77694943e-01f,
+        3.87633504e-01f, 3.97697421e-01f, 4.07887708e-01f, 4.18205605e-01f, 4.28652591e-01f, 4.39230391e-01f,
+        4.49940993e-01f, 4.60786652e-01f, 4.71769905e-01f, 4.82893580e-01f, 4.94160809e-01f, 5.05575036e-01f,
+        5.17140031e-01f, 5.28859900e-01f, 5.40739096e-01f, 5.52782432e-01f, 5.64995090e-01f, 5.77382639e-01f,
+        5.89951040e-01f, 6.02706666e-01f, 6.15656310e-01f, 6.28807202e-01f, 6.42167019e-01f, 6.55743908e-01f,
+        6.69546490e-01f, 6.83583887e-01f, 6.97865729e-01f, 7.12402181e-01f, 7.27203953e-01f, 7.42282322e-01f,
+        7.57649155e-01f, 7.73316926e-01f, 7.89298740e-01f, 8.05608358e-01f, 8.22260217e-01f, 8.39269463e-01f,
+        8.56651970e-01f, 8.74424378e-01f, 8.92604116e-01f, 9.11209442e-01f, 9.30259469e-01f, 9.49774208e-01f,
+        9.69774604e-01f, 9.90282579e-01f, 1.01132107e+00f, 1.03291408e+00f, 1.05508673e+00f, 1.07786529e+00f,
+        1.10127728e+00f, 1.12535146e+00f, 1.15011796e+00f, 1.17560829e+00f, 1.20185546e+00f, 1.22889400e+00f,
+        1.25676010e+00f, 1.28549162e+00f, 1.31512826e+00f, 1.34571158e+00f, 1.37728514e+00f, 1.40989460e+00f,
+        1.44358784e+00f, 1.47841507e+00f, 1.51442894e+00f, 1.55168471e+00f, 1.59024039e+00f, 1.63015687e+00f,
+        1.67149810e+00f, 1.71433126e+00f, 1.75872698e+00f, 1.80475947e+00f, 1.85250679e+00f, 1.90205105e+00f,
+        1.95347869e+00f, 2.00688065e+00f, 2.06235275e+00f, 2.11999592e+00f, 2.17991652e+00f, 2.24222670e+00f,
+        2.30704472e+00f, 2.37449538e+00f, 2.44471039e+00f, 2.51782884e+00f, 2.59399766e+00f, 2.67337209e+00f,
+        2.75611628e+00f, 2.84240383e+00f, 2.93241843e+00f, 3.02635449e+00f, 3.12441791e+00f, 3.22682682e+00f,
+        3.33381238e+00f, 3.44561973e+00f, 3.56250887e+00f, 3.68475574e+00f, 3.81265333e+00f, 3.94651282e+00f,
+        4.08666490e+00f, 4.23346116e+00f, 4.38727553e+00f, 4.54850596e+00f, 4.71757611e+00f, 4.89493722e+00f,
+        5.08107015e+00f, 5.27648761e+00f, 5.48173646e+00f, 5.69740032e+00f, 5.92410235e+00f, 6.16250823e+00f,
+        6.41332946e+00f, 6.67732689e+00f, 6.95531455e+00f, 7.24816384e+00f, 7.55680807e+00f, 7.88224735e+00f,
+        8.22555401e+00f, 8.58787841e+00f, 8.97045530e+00f, 9.37461076e+00f, 9.80176975e+00f, 1.02534643e+01f,
+        1.07313428e+01f, 1.12371793e+01f, 1.17728848e+01f, 1.23405187e+01f, 1.29423019e+01f, 1.35806306e+01f,
+        1.42580922e+01f, 1.49774818e+01f, 1.57418213e+01f, 1.65543795e+01f, 1.74186947e+01f, 1.83385994e+01f,
+        1.93182476e+01f, 2.03621450e+01f, 2.14751816e+01f, 2.26626686e+01f, 2.39303784e+01f, 2.52845893e+01f,
+        2.67321348e+01f, 2.82804577e+01f, 2.99376708e+01f, 3.17126238e+01f, 3.36149769e+01f, 3.56552840e+01f,
+        3.78450835e+01f, 4.01970005e+01f, 4.27248599e+01f, 4.54438126e+01f, 4.83704762e+01f, 5.15230921e+01f,
+        5.49217006e+01f, 5.85883374e+01f, 6.25472527e+01f, 6.68251567e+01f, 7.14514957e+01f, 7.64587609e+01f,
+        8.18828353e+01f, 8.77633847e+01f, 9.41442967e+01f, 1.01074176e+02f, 1.08606902e+02f, 1.16802259e+02f,
+        1.25726650e+02f, 1.35453899e+02f, 1.46066166e+02f, 1.57654979e+02f, 1.70322410e+02f, 1.84182406e+02f,
+        1.99362306e+02f, 2.16004568e+02f, 2.34268740e+02f, 2.54333703e+02f, 2.76400239e+02f, 3.00693971e+02f,
+        3.27468728e+02f, 3.57010397e+02f, 3.89641362e+02f, 4.25725590e+02f, 4.65674502e+02f, 5.09953726e+02f,
+        5.59090900e+02f, 6.13684688e+02f, 6.74415211e+02f, 7.42056139e+02f, 8.17488717e+02f, 9.01718069e+02f,
+        9.95892168e+02f, 1.10132394e+03f, 1.21951707e+03f, 1.35219615e+03f, 1.50134197e+03f, 1.66923291e+03f,
+        1.85849349e+03f, 2.07215152e+03f, 2.31370536e+03f, 2.58720328e+03f, 2.89733724e+03f, 3.24955383e+03f,
+        3.65018587e+03f, 4.10660860e+03f, 4.62742547e+03f, 5.22268956e+03f, 5.90416786e+03f, 6.68565726e+03f,
+        7.58336313e+03f, 8.61635357e+03f, 9.80710572e+03f, 1.11821637e+04f, 1.27729327e+04f, 1.46166396e+04f,
+        1.67574960e+04f, 1.92481112e+04f, 2.21512104e+04f, 2.55417295e+04f, 2.95093735e+04f, 3.41617487e+04f,
+        3.96282043e+04f, 4.60645561e+04f, 5.36589049e+04f, 6.26388223e+04f, 7.32802431e+04f, 8.59184957e+04f,
+        1.00962017e+05f, 1.18909442e+05f, 1.40370957e+05f, 1.66095034e+05f, 1.97001996e+05f, 2.34226253e+05f,
+        2.79169596e+05f, 3.33568603e+05f, 3.99580125e+05f, 4.79889989e+05f, 5.77851588e+05f, 6.97663062e+05f,
+        8.44594440e+05f, 1.02527965e+06f, 1.24809298e+06f, 1.52363581e+06f, 1.86536786e+06f, 2.29042802e+06f,
+        2.82070529e+06f, 3.48424008e+06f, 4.31706343e+06f, 5.36561882e+06f, 6.68996113e+06f, 8.36799594e+06f,
+        1.05011160e+07f, 1.32217203e+07f, 1.67032788e+07f, 2.11738506e+07f, 2.69343047e+07f, 3.43829654e+07f,
+        4.40490690e+07f, 5.66383460e+07f, 7.30953564e+07f, 9.46890531e+07f, 1.23130681e+08f, 1.60736861e+08f,
+        2.10656057e+08f, 2.77184338e+08f, 3.66207397e+08f, 4.85821891e+08f, 6.47212479e+08f, 8.65895044e+08f,
+        1.16348659e+09f, 1.57023596e+09f, 2.12865840e+09f, 2.89877917e+09f, 3.96573294e+09f, 5.45082863e+09f,
+        7.52773593e+09f, 1.04462776e+10f, 1.45675716e+10f, 2.04161928e+10f, 2.87579864e+10f, 4.07167363e+10f,
+        5.79499965e+10f, 8.29154750e+10f, 1.19276754e+11f, 1.72524570e+11f, 2.50933409e+11f, 3.67042596e+11f,
+        5.39962441e+11f, 7.98985690e+11f, 1.18927611e+12f, 1.78088199e+12f, 2.68310388e+12f, 4.06753710e+12f,
+        6.20525592e+12f, 9.52719664e+12f, 1.47228407e+13f, 2.29025392e+13f, 3.58662837e+13f, 5.65517100e+13f,
+        8.97859411e+13f, 1.43556057e+14f, 2.31171020e+14f, 3.74966777e+14f, 6.12702071e+14f, 1.00868013e+15f,
+        1.67323268e+15f, 2.79711270e+15f, 4.71267150e+15f, 8.00353033e+15f, 1.37027503e+16f, 2.36538022e+16f,
+        4.11734705e+16f, 7.22793757e+16f, 1.27982244e+17f, 2.28603237e+17f, 4.11976277e+17f, 7.49169358e+17f,
+        1.37488861e+18f, 2.54681529e+18f, 4.76248383e+18f, 8.99167123e+18f, 1.71428840e+19f, 3.30088717e+19f,
+        6.42020070e+19f, 1.26155602e+20f, 2.50480806e+20f, 5.02601059e+20f, 1.01935525e+21f, };
+
+__constant__ float* m_weights_float[8] = {
+    m_weights_float_1,
+    m_weights_float_2,
+    m_weights_float_3,
+    m_weights_float_4,
+    m_weights_float_5,
+    m_weights_float_6,
+    m_weights_float_7,
+    m_weights_float_8
+};
+
+__constant__ double m_abscissas_double_1[13] =
+      { 7.241670621354483269e-163, 2.257639733856759198e-60, 1.153241619257215165e-22, 8.747691973876861825e-09, 
+        1.173446923800022477e-03, 1.032756936219208144e-01, 7.719261204224504866e-01, 4.355544675823585545e+00,
+        1.215101039066652656e+02, 6.228845436711506169e+05, 6.278613977336989392e+15, 9.127414935180233465e+42, 
+        6.091127771174027909e+116, };
+
+__constant__ double m_abscissas_double_2[12] =
+      { 4.547459836328942014e-99, 6.678756542928857080e-37, 5.005042973041566360e-14, 1.341318484151208960e-05, 
+        1.833875636365939263e-02, 3.257972971286326131e-01, 1.712014688483495078e+00, 1.613222549264089627e+01, 
+        3.116246745274236447e+03, 3.751603952020919663e+09, 1.132259067258797346e+26, 6.799257464097374238e+70, };
+
+__constant__ double m_abscissas_double_3[25] =
+      { 5.314690663257815465e-127, 2.579830034615362946e-77, 3.534801062399966878e-47, 6.733941646704537777e-29, 
+        8.265803726974829043e-18, 4.424914371157762285e-11, 5.390411046738629465e-07, 1.649389713333761449e-04, 
+        5.463728936866216652e-03, 4.787896410534771955e-02, 1.931544616590306846e-01, 5.121421856617965197e-01, 
+        1.144715949265016019e+00, 2.648424684387670480e+00, 7.856804169938798917e+00, 3.944731803343517708e+01, 
+        5.060291993016831194e+02, 3.181117494063683297e+04, 2.820174654949211729e+07, 1.993745099515255184e+12, 
+        1.943469269499068563e+20, 2.858803732300638372e+33, 1.457292199029008637e+55, 8.943565831706355607e+90,
+        9.016198369791554655e+149, };
+
+__constant__ double m_abscissas_double_4[49] =
+      { 8.165631636299519857e-144, 3.658949309353149331e-112, 1.635242513882908826e-87, 2.578381184977746454e-68,
+        2.305546416275824199e-53, 1.016725540031465162e-41, 1.191823622917539774e-32, 1.379018088205016509e-25, 
+        4.375640088826073184e-20, 8.438464631330991606e-16, 1.838483310261119782e-12, 7.334264181393092650e-10, 
+        7.804740587931068021e-08, 2.970395577741681504e-06, 5.081805431666579484e-05, 4.671401627620431498e-04, 
+        2.652347404231090523e-03, 1.037409202661683856e-02, 3.045225582205323946e-02, 7.178280364982721201e-02, 
+        1.434001065841990688e-01, 2.535640852949085796e-01, 4.113268917643175920e-01, 6.310260805648534613e-01,
+        9.404706503455087817e-01, 1.396267301972783068e+00, 2.116896928689963277e+00, 3.364289290471596568e+00,
+        5.770183960005836987e+00, 1.104863531218761752e+01, 2.460224479439805859e+01, 6.699316387888639988e+01,
+        2.375794092475844708e+02, 1.188092202760116066e+03, 9.269848635975416108e+03, 1.283900116155671304e+05,
+        3.723397798030112514e+06, 2.793667983952389721e+08, 7.112973790863854188e+10, 8.704037695808749572e+13,
+        8.001474015782459984e+17, 9.804091819390540578e+22, 3.342777673392873288e+29, 8.160092668471508447e+37,
+        4.798775331663586528e+48, 3.228614320248853938e+62, 1.836986041572136151e+80, 1.153145986877483804e+103,
+        2.160972586723647751e+132, };
+
+__constant__ double m_abscissas_double_5[98] =
+      { 4.825077401709435655e-153, 3.813781211050297560e-135, 2.377824349780240844e-119, 2.065817295388293122e-105,
+        4.132105770181358886e-93, 2.963965169989404311e-82, 1.127296662046635391e-72, 3.210346399945695041e-64,
+        9.282992368222161062e-57, 3.565977853916619677e-50, 2.306962519220473637e-44, 3.098751038516535098e-39,
+        1.039558064722960891e-34, 1.025256027381235200e-30, 3.432612000569885403e-27, 4.429681881379089961e-24,
+        2.464589267395236846e-21, 6.526691446363344923e-19, 8.976892324445928684e-17, 6.926277695183452225e-15,
+        3.208805316815751272e-13, 9.478053068835988899e-12, 1.882052586691155400e-10, 2.632616062773909009e-09,
+        2.703411837703917665e-08, 2.113642195965330965e-07, 1.299327029813074013e-06, 6.461189935136030673e-06,
+        2.665090959570723827e-05, 9.322774986189288194e-05, 2.820463407940068813e-04, 7.508613300035051413e-04,
+        1.786142185986551786e-03, 3.848376610765768211e-03, 7.600810651854199771e-03, 1.390873269178271700e-02,
+        2.380489559528694982e-02, 3.842796337748997654e-02, 5.895012901671883992e-02, 8.651391160689367948e-02,
+        1.221961347398101671e-01, 1.670112314557845555e-01, 2.219593619059930701e-01, 2.881200442770917241e-01,
+        3.667906976948184315e-01, 4.596722879563388211e-01, 5.691113093602836208e-01, 6.984190600916228379e-01,
+        8.523070690462583711e-01, 1.037505121571600249e+00, 1.263672635742961915e+00, 1.544788480334120896e+00,
+        1.901333876886441433e+00, 2.363816272813317635e+00, 2.978614980117902904e+00, 3.817957977526709364e+00,
+        4.997477803461245639e+00, 6.708150685706236545e+00, 9.276566033183386532e+00, 1.328332469239125539e+01,
+        1.980618680552458639e+01, 3.094452809319702849e+01, 5.101378787119006225e+01, 8.943523638413590523e+01,
+        1.682138665185088325e+02, 3.427988270281270587e+02, 7.653823671943767281e+02, 1.895993667030670343e+03,
+        5.285404568827643942e+03, 1.684878049282191210e+04, 6.254388805482299369e+04, 2.759556544455721132e+05,
+        1.481213238071008345e+06, 9.929728611179601424e+06, 8.564987764771851841e+07, 9.831650826344826952e+08,
+        1.560339073978569502e+10, 3.575098885016726922e+11, 1.241973798101884982e+13, 6.915106205748805839e+14,
+        6.571419716645131084e+16, 1.144558033138694099e+19, 3.960915669532823553e+21, 2.984410558028297842e+24,
+        5.430494850258846715e+27, 2.683747612498502676e+31, 4.114885708325522701e+35, 2.276004816861421600e+40,
+        5.387544917595833246e+45, 6.623575732955432303e+51, 5.266881304835239338e+58, 3.473234812654772210e+66,
+        2.517492645985977377e+75, 2.759797646289240629e+85, 6.569603829502412077e+96, 5.116181648220647995e+109,
+        2.073901892339407423e+124, 7.406462446666255838e+140, };
+
+__constant__ double m_abscissas_double_6[196] =
+      { 7.053618140948655098e-158, 2.343354218558056628e-148, 2.062509087689351439e-139, 5.212388628332260488e-131,
+        4.079380320868843387e-123, 1.061481285006738214e-115, 9.816727607793017691e-109, 3.435400719609722581e-102,
+        4.825198574681495574e-96, 2.874760995089533358e-90, 7.652499977338879996e-85, 9.556944498127119032e-80, 
+        5.862241023038227937e-75, 1.843934000129616663e-70, 3.096983980846232911e-66, 2.885057452402340330e-62,
+        1.544904681826443837e-58, 4.917572705671511534e-55, 9.602608566391652866e-52, 1.184882375237471009e-48,
+        9.499223316355714793e-46, 5.078965858882528461e-43, 1.856080838373584123e-40, 4.744245560917271585e-38,
+        8.667497891102658240e-36, 1.155086178652063612e-33, 1.144541329818836153e-31, 8.585083084065812874e-30,
+        4.957702933032408922e-28, 2.239353794616277882e-26, 8.030405447708765492e-25, 2.318459271131684362e-23,
+        5.460287296679086677e-22, 1.062054307071706375e-20, 1.725955878033239909e-19, 2.369168446274347137e-18,
+        2.775176063916613602e-17, 2.800847352316621903e-16, 2.457625954357892245e-15, 1.890842052364646528e-14,
+        1.285791209258834942e-13, 7.786001004707878219e-13, 4.228083024410741194e-12, 2.072664297543567489e-11,
+        9.229295073519997559e-11, 3.754886152592311575e-10, 1.403443871774813834e-09, 4.843962757371872495e-09,
+        1.551373196623161433e-08, 4.631448362339623514e-08, 1.294370176865168120e-07, 3.400050664017164356e-07,
+        8.426290307581447654e-07, 1.977205177561996033e-06, 4.407362363338667830e-06, 9.362197325373404563e-06,
+        1.900760383449277992e-05, 3.698530963711860636e-05, 6.915333419235766653e-05, 1.245492076251852927e-04,
+        2.165764713808099093e-04, 3.643870211078977292e-04, 5.943999416122372516e-04, 9.418663022314558591e-04,
+        1.452364274261880083e-03, 2.183094846035196562e-03, 3.203848855069215278e-03, 4.597532353031862490e-03,
+        6.460168315117479792e-03, 8.900334989802041559e-03, 1.203804973137064275e-02, 1.600315622064554965e-02,
+        2.093331703849583304e-02, 2.697174812170771748e-02, 3.426485378063329473e-02, 4.295992956149806344e-02,
+        5.320309587203163231e-02, 6.513760993479510261e-02, 7.890268021756337834e-02, 9.463287940877026649e-02,
+        1.124582226719385153e-01, 1.325049504086213973e-01, 1.548970316076579260e-01, 1.797583869192584860e-01,
+        2.072158210677632145e-01, 2.374026527414815016e-01, 2.704630368855767324e-01, 3.065569893452247137e-01,
+        3.458661469783558388e-01, 3.886003277325320632e-01, 4.350049951304795319e-01, 4.853697810067132707e-01,
+        5.400382807495678589e-01, 5.994194092045578293e-01, 6.640006964388650918e-01, 7.343640159321037167e-01,
+        8.112043806284638130e-01, 8.953526245122194172e-01, 9.878030224123093447e-01, 1.089747207002141516e+00,
+        1.202616144679226559e+00, 1.328132465995424226e+00, 1.468376159872979355e+00, 1.625867601500928277e+00,
+        1.803673186618691186e+00, 2.005540624723209206e+00, 2.236073393446881709e+00, 2.500957254018255004e+00,
+        2.807256477663534857e+00, 3.163804128101147487e+00, 3.581720263742550029e+00, 4.075105576391566303e+00,
+        4.661977749936137761e+00, 5.365546718714963091e+00, 6.215967676434536043e+00, 7.252774367330402583e+00,
+        8.528291278204291331e+00, 1.011247001122720391e+01, 1.209982167952718578e+01, 1.461947158782994207e+01,
+        1.784992423404041042e+01, 2.204102944968352178e+01, 2.754711235628932374e+01, 3.487766600641650640e+01,
+        4.477610230214251576e+01, 5.834406132739843834e+01, 7.724096630394042216e+01, 1.040101075374387191e+02,
+        1.426215523101601730e+02, 1.993940974645466479e+02, 2.845939167898235356e+02, 4.152683836292551147e+02,
+        6.203878718481709769e+02, 9.504080873581791535e+02, 1.495523342124078853e+03, 2.421485328006836634e+03,
+        4.041977218227396500e+03, 6.969453497454785202e+03, 1.244001690461442846e+04, 2.303794930506892099e+04,
+        4.437240927040385250e+04, 8.911296561746717657e+04, 1.871159398849787994e+05, 4.119851492265743330e+05,
+        9.540971729944126398e+05, 2.331680521880789706e+06, 6.034305391011695472e+06, 1.659896369452266448e+07,
+        4.872448839341613053e+07, 1.532687586549090392e+08, 5.189730792935011722e+08, 1.900599621040508288e+09,
+        7.566480431232731818e+09, 3.292298322781643849e+10, 1.574714421665075635e+11, 8.330244306239795892e+11,
+        4.905619969814187571e+12, 3.238316002757222702e+13, 2.413995281454699076e+14, 2.048115587426077343e+15,
+        1.994352670766892066e+16, 2.248750566422739144e+17, 2.964037541992353401e+18, 4.613233119968213445e+19,
+        8.569680508342001161e+20, 1.921851711942844799e+22, 5.266829246099861758e+23, 1.786779952992288976e+25,
+        7.607919705736976491e+26, 4.125721424346450007e+28, 2.894340142292214313e+30, 2.670720269656428272e+32,
+        3.299248229135205151e+34, 5.560105583582310103e+36, 1.304167266599523020e+39, 4.349382146382717353e+41,
+        2.109720387774341509e+44, 1.524825352702403324e+47, 1.684941265105084589e+50, 2.925572737558413426e+53,
+        8.217834961057481281e+56, 3.852117991896536784e+60, 3.114452310394384063e+64, 4.498555465873245751e+68,
+        1.205113215232800796e+73, 6.230864727145221322e+77, 6.487131248948465269e+82, 1.422810109167834249e+88,
+        6.897656089181724717e+93, 7.779163462756485195e+99, 2.155213251859555072e+106, 1.554347160152705281e+113,
+        3.103875072425192272e+120, 1.832673821557018634e+128, 3.431285951865278376e+136, 2.194542081542393530e+145, };
+
+__constant__ double m_abscissas_double_7[393] =
+      { 2.363803632659058081e-160, 1.926835442612677686e-155, 1.109114905180506786e-150, 4.556759282087534164e-146,
+        1.350172241067816232e-141, 2.914359263635229435e-137, 4.627545976953585825e-133, 5.456508344460398758e-129,
+        4.821828861306345485e-125, 3.221779152402086241e-121, 1.641732102111619421e-117, 6.433569189921227126e-114,
+        1.954582672700428961e-110, 4.639912078942456372e-107, 8.671928891742699827e-104, 1.285485264305858782e-100,
+        1.522161801460927566e-97, 1.449767844425295085e-94, 1.118122255504445235e-91, 7.028344777398825069e-89,
+        3.623454064991238081e-86, 1.541513438874996543e-83, 5.443699502170284982e-81, 1.604913673768949456e-78,
+        3.972206240977317536e-76, 8.297975554162539562e-74, 1.470748835855054032e-71, 2.222935801472624670e-69,
+        2.879160361851977720e-67, 3.210837413250902178e-65, 3.097303984958235490e-63, 2.595974479763180595e-61,
+        1.898656799199089593e-59, 1.216865518398435626e-57, 6.862041810601184397e-56, 3.418134121780773218e-54,
+        1.509758535747580387e-52, 5.934924977563731784e-51, 2.083865009061241099e-49, 6.558128104492290092e-48,
+        1.856133016606468181e-46, 4.739964621828176249e-45, 1.095600459825324697e-43, 2.299177139060262518e-42,
+        4.393663812095906869e-41, 7.667728102142858487e-40, 1.225476279042445010e-38, 1.798526997315960782e-37,
+        2.430201154741018716e-36, 3.030993518975438712e-35, 3.497966609954172613e-34, 3.744308272796551045e-33,
+        3.726132797819332658e-32, 3.455018936399215381e-31, 2.991524108706319604e-30, 2.423818520801870809e-29,
+        1.841452809687011486e-28, 1.314419760826235421e-27, 8.831901010260867670e-27, 5.596660060604091621e-26,
+        3.350745417080507841e-25, 1.898675566025820409e-24, 1.019982287418197376e-23, 5.203315082978366918e-23,
+        2.524668746906057148e-22, 1.166904646009344233e-21, 5.145437675264868732e-21, 2.167677145279166596e-20,
+        8.736996911006110678e-20, 3.373776431076593266e-19, 1.249769727462160008e-18, 4.446913832647864892e-18,
+        1.521741180930875343e-17, 5.014158301377399707e-17, 1.592708205361177316e-16, 4.882536933653862982e-16,
+        1.446109387544416586e-15, 4.142510168443201880e-15, 1.148892083132325407e-14, 3.088024760858924214e-14,
+        8.051699653634442236e-14, 2.038478329249539199e-13, 5.015686309363884049e-13, 1.200444984849900298e-12,
+        2.797125428309156462e-12, 6.350357793399881333e-12, 1.405881744263466936e-11, 3.037391821635123795e-11,
+        6.408863411016101449e-11, 1.321618431565916164e-10, 2.665526566207284474e-10, 5.261497418654313068e-10,
+        1.017123184766088896e-09, 1.926882221639203388e-09, 3.579523428497157488e-09, 6.524486847652635035e-09,
+        1.167543991262942921e-08, 2.052356080018121741e-08, 3.545879678923676129e-08, 6.024472481556065885e-08,
+        1.007076869023518125e-07, 1.657191565891799652e-07, 2.685718943404479677e-07, 4.288752213761154116e-07,
+        6.751222405372943925e-07, 1.048111270324302884e-06, 1.605433960692314060e-06, 2.427271958412371013e-06,
+        3.623770645356477660e-06, 5.344280132492750309e-06, 7.788767891027678939e-06, 1.122171160022519082e-05,
+        1.598877254198599908e-05, 2.253652700952153115e-05, 3.143549403208496646e-05, 4.340664122305257288e-05,
+        5.935147653125578529e-05, 8.038574285450253209e-05, 1.078766266062957565e-04, 1.434832731669987826e-04,
+        1.892002753957224677e-04, 2.474036705329449166e-04, 3.208988510028906069e-04, 4.129696713145546995e-04,
+        5.274279220384250390e-04, 6.686622480794640482e-04, 8.416855170641220285e-04, 1.052179598744440400e-03,
+        1.306536501050643762e-03, 1.611894824798787196e-03, 1.976170547826080496e-03, 2.408081229927640721e-03,
+        2.917162840577481875e-03, 3.513778549028205519e-03, 4.209118976964403112e-03, 5.015193592567630665e-03,
+        5.944813116164644191e-03, 7.011563005746090924e-03, 8.229768289624073049e-03, 9.614450207543986041e-03,
+        1.118127530523730813e-02, 1.294649779580742160e-02, 1.492689615029751590e-02, 1.713970500593860526e-02,
+        1.960254358145296755e-02, 2.233334186285684056e-02, 2.535026586984720664e-02, 2.867164333232700310e-02,
+        3.231589109997912964e-02, 3.630144557680610965e-02, 4.064669741956638109e-02, 4.536993166688766414e-02,
+        5.048927437769432941e-02, 5.602264675683979161e-02, 6.198772763597769678e-02, 6.840192506222012774e-02,
+        7.528235762939712171e-02, 8.264584606994605986e-02, 9.050891551257121825e-02, 9.888780870447738360e-02,
+        1.077985103995250356e-01, 1.172567830270636607e-01, 1.272782136821146663e-01, 1.378782724173011162e-01,
+        1.490723817714478840e-01, 1.608759974398061173e-01, 1.733046999768424060e-01, 1.863742974247175786e-01,
+        2.001009387790379976e-01, 2.145012382381487190e-01, 2.295924102330349785e-01, 2.453924153016625057e-01,
+        2.619201169541956490e-01, 2.791954497739298773e-01, 2.972395991130188526e-01, 3.160751928723792943e-01,
+        3.357265060019327741e-01, 3.562196785212496373e-01, 3.775829480426418792e-01, 3.998468979800887046e-01,
+        4.230447228497335035e-01, 4.472125123131631074e-01, 4.723895558858634018e-01, 4.986186705332947608e-01,
+        5.259465537097384485e-01, 5.544241647649479754e-01, 5.841071380560416511e-01, 6.150562315632864018e-01,
+        6.473378153258308278e-01, 6.810244045956889952e-01, 7.161952432654565143e-01, 7.529369438691556459e-01,
+        7.913441913000366617e-01, 8.315205183502086596e-01, 8.735791622734589226e-01, 9.176440128265773576e-01,
+        9.638506636817484398e-01, 1.012347580753402101e+00, 1.063297402882930381e+00, 1.116878392515788506e+00,
+        1.173286056537125469e+00, 1.232734960362603918e+00, 1.295460761779549539e+00, 1.361722494981910846e+00,
+        1.431805139837984876e+00, 1.506022516788234345e+00, 1.584720554029819354e+00, 1.668280980969603645e+00,
+        1.757125510515793421e+00, 1.851720582866847453e+00, 1.952582755329533200e+00, 2.060284836698905963e+00,
+        2.175462881275503983e+00, 2.298824177179966629e+00, 2.431156386859774759e+00, 2.573338025304717222e+00,
+        2.726350494395667363e+00, 2.891291931102408784e+00, 3.069393174263124520e+00, 3.262036211067640944e+00,
+        3.470775532153801919e+00, 3.697362905908153155e+00, 3.943776181224350319e+00, 4.212252847439515687e+00,
+        4.505329225191826639e+00, 4.825886338442190807e+00, 5.177203733275742875e+00, 5.563022772612923373e+00,
+        5.987621259260909859e+00, 6.455901637501497370e+00, 6.973495514195020291e+00, 7.546887847708181032e+00,
+        8.183564906772872855e+00, 8.892191039842283431e+00, 9.682820467523296204e+00, 1.056715177903931837e+01,
+        1.155883465937652851e+01, 1.267384070151528947e+01, 1.393091310389918289e+01, 1.535211379418177923e+01,
+        1.696349128797309510e+01, 1.879589868990482198e+01, 2.088599907466058846e+01, 2.327750557804054323e+01,
+        2.602271658731131093e+01, 2.918442338619305962e+01, 3.283828974258811174e+01, 3.707583192189045823e+01,
+        4.200816575721451990e+01, 4.777073782243997224e+01, 5.452932468101429049e+01, 6.248767344468634478e+01,
+        7.189727649240108469e+01, 8.306993427631743111e+01, 9.639397813652482031e+01, 1.123553215857374919e+02,
+        1.315649140340119335e+02, 1.547947284376312334e+02, 1.830251850988715552e+02, 2.175079854175568113e+02,
+        2.598498278995140400e+02, 3.121245867818556035e+02, 3.770245173783702458e+02, 4.580653020257635092e+02,
+        5.598658426219653689e+02, 6.885324967857802403e+02, 8.521902266884453403e+02, 1.061721815114114004e+03,
+        1.331803836529085656e+03, 1.682368940494210217e+03, 2.140685129891926443e+03, 2.744334847491432747e+03,
+        3.545516659371773357e+03, 4.617306735234797694e+03, 6.062848530677391758e+03, 8.028955134017154634e+03,
+        1.072641999277462936e+04, 1.446061873485939411e+04, 1.967804579389513789e+04, 2.703776201447279367e+04,
+        3.752217148194723312e+04, 5.261052412010591097e+04, 7.455350923854624329e+04, 1.068125318497402759e+05,
+        1.547702528541975911e+05, 2.268930751685412563e+05, 3.366554971645478061e+05, 5.057644049026088560e+05,
+        7.696291826884134742e+05, 1.186761864945790800e+06, 1.855146094294667715e+06, 2.941132644236832276e+06,
+        4.731169740596920355e+06, 7.725765147199987935e+06, 1.281272565991955126e+07, 2.159151785284808339e+07,
+        3.699029448836502904e+07, 6.445902263727884020e+07, 1.143158678867853615e+08, 2.064425450996979446e+08,
+        3.798502995329785506e+08, 7.125329484929003007e+08, 1.363463294023391629e+09, 2.663196590686555077e+09,
+        5.313347815419462975e+09, 1.083506369700027396e+10, 2.259930737910197667e+10, 4.824707991473375387e+10,
+        1.055069002818752104e+11, 2.365138040635727209e+11, 5.439266129959972285e+11, 1.284356371641026839e+12,
+        3.116424654245920797e+12, 7.777312465656280419e+12, 1.997984843259596733e+13, 5.288649037339853118e+13,
+        1.443776937640548342e+14, 4.068967444890414804e+14, 1.185049702391501141e+15, 3.570348091883284324e+15,
+        1.113971254034978026e+16, 3.603374982229766184e+16, 1.209803708182151942e+17, 4.220890251904870611e+17,
+        1.532169872312865862e+18, 5.793890867821715890e+18, 2.285379920879842924e+19, 9.415714369232187727e+19,
+        4.057471211245170887e+20, 1.831405465804324767e+21, 8.671209773404504008e+21, 4.313209261217173994e+22,
+        2.257498454242656934e+23, 1.245267136898199709e+24, 7.251536499435180219e+24, 4.465573963364524765e+25,
+        2.913233420596266283e+26, 2.017063171206072979e+27, 1.485014353353330393e+28, 1.164811091759882662e+29,
+        9.753661264047912784e+29, 8.737124417851167566e+30, 8.390503265508677363e+31, 8.657362701430272680e+32,
+        9.619472292454361392e+33, 1.153735498483960294e+35, 1.497284701983562213e+36, 2.107816695320163748e+37,
+        3.227106623185610745e+38, 5.387696372515021985e+39, 9.835496017627849225e+40, 1.968904749086105300e+42,
+        4.334704147416758275e+43, 1.052717645113369473e+45, 2.829013521120326147e+46, 8.439656297525588822e+47,
+        2.804279894508234869e+49, 1.041383695988523864e+51, 4.337366591019718310e+52, 2.033523569151676725e+54,
+        1.077238847489773081e+56, 6.472891251891105455e+57, 4.429404678715878536e+59, 3.466135480828349864e+61,
+        3.114928656972704276e+63, 3.228947925415990689e+65, 3.878402486902381042e+67, 5.423187597439531197e+69,
+        8.870779393460412583e+71, 1.705832285076755970e+74, 3.876224350373120420e+76, 1.046359534886878004e+79,
+        3.373858809560757544e+81, 1.306762499786044015e+84, 6.115300889685679832e+86, 3.478550048884517349e+89,
+        2.420073578988056289e+92, 2.072453567501123129e+95, 2.199029867204449277e+98, 2.910868575802139983e+101,
+        4.840699137490951163e+104, 1.018669397739170369e+108, 2.733025017438095928e+111, 9.420797277586029837e+114,
+        4.205525105722885986e+118, 2.451352708852151939e+122, 1.881577053794165543e+126, 1.918506219134233785e+130,
+        2.622069659115564900e+134, 4.848463485415763756e+138, 1.224645005481997780e+143, 4.267387286482591954e+147,
+        2.072505613372582377e+152, };
+
+__constant__ double m_abscissas_double_8[786] =
+      { 1.323228129684237783e-161, 4.129002973520822791e-159, 1.178655462569548882e-156, 3.082189008893206231e-154, 
+        7.393542832199414487e-152, 1.629100644355328639e-149, 3.301545529059822941e-147, 6.162031390854241227e-145,
+        1.060528194470986309e-142, 1.685225757497235089e-140, 2.475534097582263629e-138, 3.365764749507587192e-136,
+        4.240562683924022383e-134, 4.956794227885611715e-132, 5.381716367914161520e-130, 5.433507172294988849e-128,
+        5.107031242794315420e-126, 4.473704932098646394e-124, 3.656376947377888629e-122, 2.791170022694259001e-120,
+        1.992200238692415032e-118, 1.330894359393789718e-116, 8.330356767359890503e-115, 4.890256639970245146e-113,
+        2.695128935451165447e-111, 1.395829605415630844e-109, 6.799997527188085942e-108, 3.119037767379032293e-106,
+        1.348260131419216291e-104, 5.497526018943990804e-103, 2.116384670251198533e-101, 7.699148714858061209e-100,
+        2.649065347250598345e-98, 8.628189263549727753e-97, 2.662520943248368922e-95, 7.790698623582886341e-94,
+        2.163354866683077281e-92, 5.705576739797220361e-91, 1.430338193028564913e-89, 3.411040781372328747e-88,
+        7.744268073516449037e-87, 1.675136564303435813e-85, 3.454795810595704816e-84, 6.798573137099477363e-83,
+        1.277474708033782661e-81, 2.293702139426309483e-80, 3.938021700015175030e-79, 6.469593934876300124e-78,
+        1.017725266990912471e-76, 1.534019529793324951e-75, 2.216999886838860916e-74, 3.074100747562803362e-73,
+        4.092295330837549092e-72, 5.233434175636538471e-71, 6.433506079763357418e-70, 7.607042677901362161e-69,
+        8.656714387163425357e-68, 9.486746058685489974e-67, 1.001756724248288397e-65, 1.019853943834854330e-64,
+        1.001591106610665630e-63, 9.494277822444263952e-63, 8.691422918891890649e-62, 7.687977047887448276e-61,
+        6.574408104196605248e-60, 5.438162502918425191e-59, 4.353340831363003212e-58, 3.374338762181243411e-57,
+        2.533770921173042330e-56, 1.844048925248616738e-55, 1.301410812308480184e-54, 8.910466744374470063e-54,
+        5.921538384124132331e-53, 3.821356134297705127e-52, 2.395780657353036891e-51, 1.459882187581820236e-50,
+        8.650105472076777327e-50, 4.985933550797199316e-49, 2.796911903237435916e-48, 1.527570118993503332e-47,
+        8.126314048196993302e-47, 4.212436363948578182e-46, 2.128604050242564662e-45, 1.048938356323431072e-44,
+        5.042753142653687842e-44, 2.365999225494165364e-43, 1.083813462091040325e-42, 4.848963367960316169e-42,
+        2.119612873737657277e-41, 9.055947139022002648e-41, 3.782987192192666650e-40, 1.545649846917574765e-39,
+        6.178909752126026357e-39, 2.417597558625940386e-38, 9.261305999966332746e-38, 3.474712971194656115e-37,
+        1.277215890629181345e-36, 4.600938133935473864e-36, 1.624804314773052044e-35, 5.626808103137929972e-35,
+        1.911442429947086471e-34, 6.371300415498187125e-34, 2.084444531309441237e-33, 6.695356060065574234e-33,
+        2.112038435637792931e-32, 6.544802906551512393e-32, 1.992864937623987114e-31, 5.964358817764151755e-31,
+        1.754973231464949500e-30, 5.078231558861773863e-30, 1.445447866528259475e-29, 4.048099759391660786e-29,
+        1.115752878927994221e-28, 3.027334168442338592e-28, 8.087868498106224788e-28, 2.128106544151858936e-27,
+        5.516210113930227985e-27, 1.408890921124863906e-26, 3.546520734326774807e-26, 8.800636481096360494e-26,
+        2.153319509043984465e-25, 5.196136544731926346e-25, 1.236869058422202190e-24, 2.904891674490918873e-24,
+        6.732707317563258763e-24, 1.540253603361391055e-23, 3.478765727687221019e-23, 7.758450079933031976e-23,
+        1.708939324269830276e-22, 3.718467010568811152e-22, 7.994094376769029920e-22, 1.698336774318343123e-21,
+        3.566214469724002275e-21, 7.402848534866351662e-21, 1.519411719755297549e-20, 3.083993994528608740e-20,
+        6.191388817974459809e-20, 1.229625987010589227e-19, 2.416245949308411084e-19, 4.698551818749419706e-19,
+        9.042992978848520439e-19, 1.722880198390020817e-18, 3.249832858354112322e-18, 6.070120594586457562e-18,
+        1.122871881646098441e-17, 2.057429235664205922e-17, 3.734613207742816399e-17, 6.716694369267842075e-17,
+        1.197063025055043952e-16, 2.114419661115663617e-16, 3.702017138231021853e-16, 6.425665498746337860e-16,
+        1.105830903726985419e-15, 1.887156051660563224e-15, 3.193979018679125833e-15, 5.361881977473204459e-15,
+        8.929318568606692809e-15, 1.475330560958586660e-14, 2.418708636765824964e-14, 3.935078350904051302e-14,
+        6.354047096308654479e-14, 1.018416666466509442e-13, 1.620423782999307693e-13, 2.559817517056126166e-13,
+        4.015273886294212810e-13, 6.254532358261761291e-13, 9.675981021394182858e-13, 1.486832112534566186e-12,
+        2.269557377760486879e-12, 3.441736008766365832e-12, 5.185793859860652413e-12, 7.764217889314004663e-12,
+        1.155228105746548036e-11, 1.708313121464262097e-11, 2.510951856086201897e-11, 3.668776978510952341e-11,
+        5.329131813941740314e-11, 7.696325397299480856e-11, 1.105200723643722855e-10, 1.578221843796034825e-10,
+        2.241309672940976766e-10, 3.165773201144956642e-10, 4.447730510871610704e-10, 6.216041661455164049e-10,
+        8.642544905395987868e-10, 1.195519306516659349e-09, 1.645482121417189823e-09, 2.253643612941620883e-09,
+        3.071610576496751310e-09, 4.166474690460445927e-09, 5.625036504185181035e-09, 7.559059638953998396e-09,
+        1.011177417876491092e-08, 1.346588701906267454e-08, 1.785340092957703350e-08, 2.356759364235337519e-08,
+        3.097756373337616088e-08, 4.054581171302714730e-08, 5.284939280085554173e-08, 6.860525247854168448e-08,
+        8.870043714076795346e-08, 1.142279599340281637e-07, 1.465291959965373757e-07, 1.872437814520259903e-07,
+        2.383680961705324062e-07, 3.023235208219232784e-07, 3.820357732606947876e-07, 4.810267467496160044e-07,
+        6.035203917139166314e-07, 7.545643021775656875e-07, 9.401687861337141280e-07, 1.167465314019272078e-06,
+        1.444886349199346242e-06, 1.782368666762205796e-06, 2.191582359683820240e-06, 2.686187812137005286e-06,
+        3.282122985909738110e-06, 3.997923415034129149e-06, 4.855077333283880469e-06, 5.878418366687560187e-06,
+        7.096558206229387964e-06, 8.542361632206236097e-06, 1.025346618920209381e-05, 1.227284870748632855e-05,
+        1.464944073127878202e-05, 1.743879474552002742e-05, 2.070380288967650755e-05, 2.451546960924430874e-05,
+        2.895373942298085844e-05, 3.410838067694928604e-05, 4.007992581615393488e-05, 4.698066833232878622e-05,
+        5.493571614427227251e-05, 6.408410073746518169e-05, 7.457994093551813828e-05, 8.659365970069775654e-05,
+        1.003132518682442285e-04, 1.159456002136906496e-04, 1.337178367385581674e-04, 1.538787455425709779e-04,
+        1.767002031351005554e-04, 2.024786515302844608e-04, 2.315365989746650402e-04, 2.642241426787982083e-04,
+        3.009205074706080013e-04, 3.420355938637258307e-04, 3.880115286439000550e-04, 4.393242107257947798e-04,
+        4.964848447258090522e-04, 5.600414544382562271e-04, 6.305803681962314437e-04, 7.087276679481586600e-04,
+        7.951505937892094439e-04, 8.905588956558126794e-04, 9.957061239230124343e-04, 1.111390850739538593e-03,
+        1.238457814094548688e-03, 1.377798976832850428e-03, 1.530354493121150144e-03, 1.697113575214988470e-03,
+        1.879115253782404405e-03, 2.077449025503311209e-03, 2.293255382179820056e-03, 2.527726216158548279e-03,
+        2.782105097477072741e-03, 3.057687418798497807e-03, 3.355820404885606963e-03, 3.677902984083964409e-03,
+        4.025385520026097270e-03, 4.399769402530814407e-03, 4.802606497446985045e-03, 5.235498455973840111e-03,
+        5.700095884774212336e-03, 6.198097378977308725e-03, 6.731248420937948614e-03, 7.301340148374219834e-03,
+        7.910207996239952125e-03, 8.559730217397303903e-03, 9.251826287833445298e-03, 9.988455202809488913e-03,
+        1.077161367093554544e-02, 1.160333421372954856e-02, 1.248568317873621646e-02, 1.342075867475355427e-02,
+        1.441068843813546585e-02, 1.545762763950860648e-02, 1.656375664055830135e-02, 1.773127871080136402e-02,
+        1.896241771447260382e-02, 2.025941577780677588e-02, 2.162453094709917839e-02, 2.306003484797691421e-02,
+        2.456821035631025318e-02, 2.615134929114115217e-02, 2.781175013990572523e-02, 2.955171582608151263e-02,
+        3.137355152920124081e-02, 3.327956256694509270e-02, 3.527205234875621605e-02, 3.735332041012234938e-02,
+        3.952566053633324126e-02, 4.179135898416228534e-02, 4.415269280953487221e-02, 4.661192830883879903e-02,
+        4.917131958110712872e-02, 5.183310721786459418e-02, 5.459951712697841302e-02, 5.747275949639657337e-02,
+        6.045502790319455825e-02, 6.354849857288828754e-02, 6.675532979350985865e-02, 7.007766148848641979e-02,
+        7.351761495191403887e-02, 7.707729274938041525e-02, 8.075877878706524317e-02, 8.456413855143733669e-02,
+        8.849541952147546057e-02, 9.255465175496720496e-02, 9.674384865008904765e-02, 1.010650078831426502e-01,
+        1.055201125230189472e-01, 1.101111323226840632e-01, 1.148400251877307103e-01, 1.197087388218165293e-01,
+        1.247192125486176994e-01, 1.298733793097628269e-01, 1.351731678380792159e-01, 1.406205050053816316e-01,
+        1.462173183439629526e-01, 1.519655387409069424e-01, 1.578671033043359383e-01, 1.639239584007306411e-01,
+        1.701380628625154331e-01, 1.765113913651907042e-01, 1.830459379734134606e-01, 1.897437198555789051e-01,
+        1.966067811666385690e-01, 2.036371970991047974e-01, 2.108370781024367852e-01, 2.182085742712797843e-01,
+        2.257538799033364379e-01, 2.334752382279873511e-01, 2.413749463071469410e-01, 2.494553601102403241e-01,
+        2.577188997656175820e-01, 2.661680549911833443e-01, 2.748053907075124803e-01, 2.836335528372471376e-01,
+        2.926552742951268547e-01, 3.018733811735925662e-01, 3.112907991295277084e-01, 3.209105599783561596e-01,
+        3.307358085024083972e-01, 3.407698094811951648e-01, 3.510159549519934555e-01, 3.614777717099542274e-01,
+        3.721589290577866932e-01, 3.830632468159621812e-01, 3.941947036053136035e-01, 4.055574454148868711e-01,
+        4.171557944689308074e-01, 4.289942584079951543e-01, 4.410775398002453309e-01, 4.534105460003012245e-01,
+        4.659983993741692944e-01, 4.788464479101668631e-01, 4.919602762371392109e-01, 5.053457170727489659e-01, 
+        5.190088631261786795e-01, 5.329560794812372669e-01, 5.471940164876055195e-01, 5.617296231898020413e-01, 
+        5.765701613254061793e-01, 5.917232199261468491e-01, 6.071967305576643327e-01, 6.229989832360855492e-01, 
+        6.391386430620321596e-01, 6.556247676153161584e-01, 6.724668251563812272e-01, 6.896747136835329047e-01, 
+        7.072587808981804764e-01, 7.252298451337033758e-01, 7.435992173071710726e-01, 7.623787239570054101e-01, 
+        7.815807314337971290e-01, 8.012181713158943859e-01, 8.213045671260926392e-01, 8.418540624307963733e-01, 
+        8.628814504084197628e-01, 8.844022049795737430e-01, 9.064325135977815717e-01, 9.289893118061069464e-01, 
+        9.520903196722039764e-01, 9.757540802219457353e-01, 1.000000000000000000e+00, 1.024848391894543008e+00, 
+        1.050320520372784475e+00, 1.076438649284173871e+00, 1.103226092399127978e+00, 1.130707266862927052e+00, 
+        1.158907749757141229e+00, 1.187854337974646084e+00, 1.217575111629048984e+00, 1.248099501235266386e+00, 
+        1.279458358915164500e+00, 1.311684033900709062e+00, 1.344810452627081143e+00, 1.378873203729832710e+00, 
+        1.413909628283517352e+00, 1.449958915644490754e+00, 1.487062205287898607e+00, 1.525262695058439148e+00, 
+        1.564605756286502811e+00, 1.605139056255971231e+00, 1.646912688547541313e+00, 1.689979311822189937e+00, 
+        1.734394297653598793e+00, 1.780215888066332921e+00, 1.827505363488657555e+00, 1.876327221885466881e+00,
+        1.926749369898304239e+00, 1.978843326886336694e+00, 2.032684442834914613e+00, 2.088352131177556992e+00,
+        2.145930117663470432e+00, 2.205506706496711366e+00, 2.267175065075584681e+00, 2.331033528772661605e+00,
+        2.397185927317806037e+00, 2.465741934479827004e+00, 2.536817442887937264e+00, 2.610534965993323711e+00,
+        2.687024069345184956e+00, 2.766421833546071979e+00, 2.848873351459948781e+00, 2.934532262474922666e+00,
+        3.023561326873131923e+00, 3.116133043635102211e+00, 3.212430315307524598e+00, 3.312647163894682976e+00, 
+        3.416989502097797957e+00, 3.525675964626843197e+00, 3.638938804749809967e+00, 3.757024861729272487e+00, 
+        3.880196605330264341e+00, 4.008733264172298986e+00, 4.142932045347867609e+00, 4.283109453446644399e+00, 
+        4.429602717916437040e+00, 4.582771338567048147e+00, 4.742998759991079249e+00, 4.910694186746867507e+00, 
+        5.086294552335034437e+00, 5.270266656314831820e+00, 5.463109485364516396e+00, 5.665356735708146927e+00, 
+        5.877579556128345480e+00, 6.100389532781943879e+00, 6.334441939256981670e+00, 6.580439277782222274e+00, 
+        6.839135140254664526e+00, 7.111338420820842566e+00, 7.397917915172903763e+00, 7.699807345544508469e+00, 
+        8.018010854664294474e+00, 8.353609016702406728e+00, 8.707765418592385473e+00, 9.081733871099147484e+00, 
+        9.476866315716376006e+00, 9.894621501007146275e+00, 1.033657451045679019e+01, 1.080442723340841910e+01, 
+        1.130001988133777781e+01, 1.182534366375335115e+01, 1.238255475156052427e+01, 1.297398967101161563e+01, 
+        1.360218228861306245e+01, 1.426988256684760289e+01, 1.498007729260327644e+01, 1.573601300513857081e+01, 
+        1.654122137866316500e+01, 1.739954734664685784e+01, 1.831518029132688981e+01, 1.929268866318984532e+01, 
+        2.033705844217826172e+01, 2.145373590584482942e+01, 2.264867523060898736e+01, 2.392839152177298272e+01, 
+        2.530001994731418268e+01, 2.677138174118011529e+01, 2.835105794560498805e+01, 3.004847188085487195e+01, 
+        3.187398146713610639e+01, 3.383898267989664904e+01, 3.595602559959535672e+01, 3.823894472392493310e+01, 
+        4.070300544879345396e+01, 4.336506889917953679e+01, 4.624377760823269784e+01, 4.935976490967979071e+01, 
+        5.273589133292714765e+01, 5.639751178186770847e+01, 6.037277784867852275e+01, 6.469298027622754351e+01, 
+        6.939293735292118365e+01, 7.451143592061966836e+01, 8.009173272176674066e+01, 8.618212503236856949e+01, 
+        9.283660095406551480e+01, 1.001155814082968890e+02, 1.080867678325352448e+02, 1.168261118752949279e+02, 
+        1.264189260858047240e+02, 1.369611577708331715e+02, 1.485608519349011866e+02, 1.613398336385932743e+02, 
+        1.754356453320629017e+02, 1.910037809024609590e+02, 2.082202655019913565e+02, 2.272846389233001078e+02, 
+        2.484234106336023257e+02, 2.718940668983047258e+02, 2.979897251188232016e+02, 3.270445480633676878e+02, 
+        3.594400516741229885e+02, 3.956124653087335485e+02, 4.360613334959077953e+02, 4.813595846269808355e+02, 
+        5.321653357808338203e+02, 5.892357556996862196e+02, 6.534433717775449045e+02, 7.257952842284018994e+02, 
+        8.074558443729566627e+02, 8.997734679339701200e+02, 1.004312392957944252e+03, 1.122890361185594877e+03,
+        1.257623408459775530e+03, 1.410979202907522234e+03, 1.585840680166573460e+03, 1.785582106601447262e+03, 
+        2.014160171499825914e+03, 2.276223289283167479e+03, 2.577243010007973485e+03, 2.923672325162804598e+03, 
+        3.323136759290736047e+03, 3.784665511113575050e+03, 4.318971620160236406e+03, 4.938792274850918489e+03, 
+        5.659303058273368331e+03, 6.498623292476395004e+03, 7.478433875318933386e+03, 8.624734342286166238e+03, 
+        9.968772633484590145e+03, 1.154818959559393902e+04, 1.340843110702649390e+04, 1.560449453908580443e+04, 
+        1.820309391023133793e+04, 2.128535066649680777e+04, 2.495014598048375046e+04, 2.931830770482188047e+04,
+        3.453785313845473397e+04, 4.079057084931056631e+04, 4.830030527863206410e+04, 5.734341246586992004e+04,
+        6.826199159022146453e+04, 8.148067525594191464e+04, 9.752799507478730867e+04, 1.170636462204808295e+05,
+        1.409133795481584143e+05, 1.701137853111825512e+05, 2.059699426710509940e+05, 2.501298539735692463e+05,
+        3.046808435555379486e+05, 3.722747886360361411e+05, 4.562913164460176067e+05, 5.610511554921845541e+05, 
+        6.920959565810343691e+05, 8.565564972181198149e+05, 1.063638800552326000e+06, 1.325268101226286025e+06,
+        1.656944841847240121e+06, 2.078886479301160156e+06, 2.617555920130068069e+06, 3.307714852226224955e+06,
+        4.195192293202626259e+06, 5.340631300250745566e+06, 6.824578495767020734e+06, 8.754424053248831818e+06,
+        1.127390159772263517e+07, 1.457614342739689625e+07, 1.892169326841938100e+07, 2.466345986800667442e+07,
+        3.228142821711217588e+07, 4.243114571539869754e+07, 5.601173714434088431e+07, 7.426172509723072112e+07,
+        9.889461357830121731e+07, 1.322915875470427182e+08, 1.777766240727455981e+08, 2.400110583389834263e+08,
+        3.255621033641982742e+08, 4.437258820593761403e+08, 6.077246218504877165e+08, 8.364565879857375417e+08,
+        1.157066594326456169e+09, 1.608740826498742961e+09, 2.248337657948688269e+09, 3.158785978851336228e+09,
+        4.461677081363911380e+09, 6.336244831048209270e+09, 9.048130159588677560e+09, 1.299321362309972265e+10,
+        1.876478261212947929e+10, 2.725703976712888971e+10, 3.982553459064288940e+10, 5.853727794017415415e+10,
+        8.656299089553103385e+10, 1.287959733041898747e+11, 1.928345065430099883e+11, 2.905510467545806044e+11,
+        4.406145488098485809e+11, 6.725708918778493152e+11, 1.033486938212196930e+12, 1.598840557086695854e+12,
+        2.490490134218272825e+12, 3.906528466724583921e+12, 6.171225147961354244e+12, 9.819163736485109137e+12,
+        1.573800106991564475e+13, 2.541245461530031221e+13, 4.134437628407981776e+13, 6.778141973485971528e+13,
+        1.119906286595884492e+14, 1.865016806041768967e+14, 3.130890948724989738e+14, 5.298978847669068280e+14,
+        9.042973899804181753e+14, 1.556259036818991439e+15, 2.701230066368200812e+15, 4.729430105054711279e+15,
+        8.353779033096586530e+15, 1.488827606293191651e+16, 2.677653466031614956e+16, 4.860434481369499270e+16,
+        8.905735519300993312e+16, 1.647413728306871552e+17, 3.077081325673016377e+17, 5.804234101329097680e+17,
+        1.105828570628099614e+18, 2.128315358808074026e+18, 4.138651532085235581e+18, 8.132554212123920035e+18,
+        1.615146503312570855e+19, 3.242548467260718193e+19, 6.581494581080701321e+19, 1.350831366183090003e+20,
+        2.804093832520937396e+20, 5.888113683467563837e+20, 1.250923435312468276e+21, 2.689280279098215635e+21,
+        5.851582825664479700e+21, 1.288917231788944660e+22, 2.874582763768997631e+22, 6.492437335109217869e+22,
+        1.485286605867082177e+23, 3.442469159113307066e+23, 8.084930196860438207e+23, 1.924506778048094878e+24,
+        4.643992662491470729e+24, 1.136281452083591334e+25, 2.819664891060694571e+25, 7.097781559991856367e+25,
+        1.812838850127688486e+26, 4.699012851344539124e+26, 1.236419707162832951e+27, 3.303236261210411286e+27,
+        8.962558097638891218e+27, 2.470294852986226117e+28, 6.918270960555942883e+28, 1.969189447958411510e+29,
+        5.698092609453981289e+29, 1.676626156396922084e+30, 5.017901520171556970e+30, 1.527929892279834489e+31,
+        4.734762318366711949e+31, 1.493572546446777040e+32, 4.797441164681908184e+32, 1.569538296400998732e+33,
+        5.231651156910242454e+33, 1.777206511525290941e+34, 6.154587299576916134e+34, 2.173469781356604872e+35,
+        7.829529896526581616e+35, 2.877935554073076917e+36, 1.079761320923458592e+37, 4.136337730951207042e+37,
+        1.618408489711185844e+38, 6.469770640447824771e+38, 2.643413654859316358e+39, 1.104246728308525703e+40,
+        4.717842641881260665e+40, 2.062296462389327711e+41, 9.226680005161257219e+41, 4.226544071632731963e+42,
+        1.983043729707066518e+43, 9.533448690970155039e+43, 4.697914578740208606e+44, 2.373923101980436574e+45,
+        1.230570211868531753e+46, 6.546344338411695147e+46, 3.575371819335804914e+47, 2.005642453538335506e+48,
+        1.156055268028903078e+49, 6.849867807870312958e+49, 4.174004815218951121e+50, 2.616872034052857472e+51,
+        1.688750346837297725e+52, 1.122275666009684101e+53, 7.683968740248677071e+53, 5.422849612654278583e+54,
+        3.946686701799533415e+55, 2.963543587288132884e+56, 2.297086395798939516e+57, 1.838856414208555761e+58,
+        1.521049475711243996e+59, 1.300732291175071112e+60, 1.150559591141716740e+61, 1.053265997373725461e+62,
+        9.984114209879020836e+62, 9.805325615938694719e+63, 9.982463564199115995e+64, 1.054102211457911410e+66,
+        1.155172684780782463e+67, 1.314571302334116663e+68, 1.554362407685457310e+69, 1.910791206002645077e+70,
+        2.443616403890711206e+71, 3.252983822318823232e+72, 4.510600140020139737e+73, 6.518821831001902447e+74,
+        9.825834460774267633e+75, 1.545692063622722856e+77, 2.539346088408163253e+78, 4.359763993811836117e+79,
+        7.827943627464404744e+80, 1.470896877674301183e+82, 2.894527071420674290e+83, 5.969662541607915492e+84,
+        1.291277613981057357e+86, 2.931656535626877923e+87, 6.991353547531463135e+88, 1.752671194525972852e+90,
+        4.622450137056020715e+91, 1.283581933169566226e+93, 3.755839001138390788e+94, 1.158991729845978702e+96,
+        3.774916315438862678e+97, 1.298844894462381673e+99, 4.725038949943384889e+100, 1.819000031203286740e+102,
+        7.416966330876906188e+103, 3.206116996910598204e+105, 1.470588770071975193e+107, 7.164198238238641057e+108,
+        3.710397624567077270e+110, 2.044882454279709373e+112, 1.200428778654730225e+114, 7.513744370030172114e+115,
+        5.019575746343410636e+117, 3.582726927665698318e+119, 2.734947775877248560e+121, 2.235283764078944248e+123,
+        1.958084751118243323e+125, 1.840431913109305657e+127, 1.858143260692831108e+129, 2.017432949655777136e+131,
+        2.358177615888101494e+133, 2.971092974178603610e+135, 4.039532321435816302e+137, 5.933923069661132195e+139,
+        9.429263693444953240e+141, 1.622841456932873872e+144, 3.028884476067694180e+146, 6.138356175015339477e+148,
+        1.352531557191942648e+151, 3.244447362295582945e+153, };
+
+__constant__ double* m_abscissas_double[8] = {
+    m_abscissas_double_1,
+    m_abscissas_double_2,
+    m_abscissas_double_3,
+    m_abscissas_double_4,
+    m_abscissas_double_5,
+    m_abscissas_double_6,
+    m_abscissas_double_7,
+    m_abscissas_double_8,
+};
+
+__constant__ double m_weights_double_1[13] =
+      { 2.703640234162693583e-160, 3.100862940179668765e-58, 5.828334625665462970e-21, 1.628894422402653830e-07,
+        8.129907377394029252e-03, 2.851214447180802931e-01, 1.228894002317118650e+00, 9.374610761705565881e+00,
+        6.136846875218162167e+02, 8.367995944653844271e+06, 2.286032371256753845e+17, 9.029964022492184559e+44,
+        1.637973037681055808e+119, };
+
+__constant__ double m_weights_double_2[12] =
+      { 1.029757744225565290e-96, 5.564174008086804112e-35, 1.534846576427062716e-12, 1.519539651119905182e-04,
+        7.878691652861874032e-02, 6.288072016384128612e-01, 2.842403831496369386e+00, 5.152309209026500589e+01,
+        2.554172947873109927e+04, 8.291547503290989754e+10, 6.794911791960761587e+27, 1.108995159102362663e+73, };
+
+__constant__ double m_weights_double_3[25] =
+      { 1.545310485347377408e-124, 4.549745016271158113e-75, 3.781189989988588481e-45, 4.369440793304363176e-27,
+        3.253896178006708087e-16, 1.057239289288944987e-09, 7.826174663495492476e-06, 1.459783224353939263e-03,
+        2.972970552567852420e-02, 1.637950661613330541e-01, 4.392303913269138921e-01, 8.744243777287317807e-01,
+        1.804759465860974506e+00, 4.894937215283148383e+00, 2.036214502429748943e+01, 1.576549789679037479e+02,
+        3.249553828744194733e+03, 3.335686029489862584e+05, 4.858218914917275532e+08, 5.655171002571584464e+13,
+        9.084276291356790926e+21, 2.202757570781655071e+35, 1.851176020895552142e+57, 1.873046373612647920e+93,
+        3.113183070605141140e+152, };
+
+__constant__ double m_weights_double_4[49] =
+      { 2.690380169654157101e-141, 9.388760099830475385e-110, 3.267856956418766261e-85, 4.012903562780032075e-66,
+        2.794595941054873674e-51, 9.598140333687791635e-40, 8.762766371925782803e-31, 7.896919977115783593e-24,
+        1.951680620313826776e-18, 2.931867534349928041e-14, 4.976350908135118762e-11, 1.546933241860617074e-08,
+        1.283189791774752963e-06, 3.809052946018782340e-05, 5.087526585392884730e-04, 3.656819625189471368e-03,
+        1.627679402690602992e-02, 5.011672130624018967e-02, 1.165913368715250324e-01, 2.201514148384271336e-01,
+        3.581909054968942386e-01, 5.288599003801643436e-01, 7.422823219366348741e-01, 1.032914080772662205e+00,
+        1.478415067523268199e+00, 2.242226697017918644e+00, 3.684755742578570582e+00, 6.677326887819023056e+00,
+        1.358063058433697357e+01, 3.171262375809110066e+01, 8.776338468947827779e+01, 3.006939713363920293e+02,
+        1.352196150715330628e+03, 8.616353573310419356e+03, 8.591849573350877359e+04, 1.523635814554291966e+06,
+        5.663834603448267056e+07, 5.450828629396188577e+09, 1.780881993484818221e+12, 2.797112703281894578e+15,
+        3.300887168363313931e+19, 5.192538272313512016e+24, 2.273085973059979872e+31, 7.124498195222272142e+39,
+        5.379592741425673874e+50, 4.647296508337283075e+64, 3.395147156494395571e+82, 2.736576372417856435e+105,
+        6.584825756536212781e+134, };
+
+__constant__ double m_weights_double_5[98] =
+      { 1.692276285171240629e-150, 1.180420021590838281e-132, 6.494931071412232065e-117, 4.979673804239645358e-103, 
+        8.790122245397054202e-91, 5.564311726870413043e-80, 1.867634664877268411e-70, 4.693767384843440310e-62,
+        1.197772698674604837e-54, 4.060530886983702887e-48, 2.318268710612758367e-42, 2.748088060676949794e-37,
+        8.136086869664039226e-33, 7.081491999860360593e-29, 2.092407629019781417e-25, 2.383020547076997517e-22,
+        1.170143938604536054e-19, 2.734857915002515580e-17, 3.319894174569245506e-15, 2.260825106530477104e-13,
+        9.244747974241858562e-12, 2.410325858091057071e-10, 4.224928060220423782e-09, 5.217223349652829804e-08,
+        4.730110697329046717e-07, 3.265522864288710545e-06, 1.772851678458610971e-05, 7.787346612077215804e-05,
+        2.838101678971546354e-04, 8.775026198694109646e-04, 2.347474744139291716e-03, 5.529174974874315725e-03,
+        1.164520226280038968e-02, 2.223487842904240574e-02, 3.896253311038730452e-02, 6.334975706136386464e-02,
+        9.651712033300261848e-02, 1.390236708907266445e-01, 1.908593745910709887e-01, 2.515965688234414960e-01,
+        3.206651646562737595e-01, 3.976974208167367099e-01, 4.828935799767836828e-01, 5.773826389735376677e-01,
+        6.835838865575605461e-01, 8.056083579298257627e-01, 9.497742078309479997e-01, 1.125351459431134254e+00,
+        1.345711576612114788e+00, 1.630156867495860456e+00, 2.006880650908830857e+00, 2.517828844916874130e+00,
+        3.226826819856410846e+00, 4.233461155863004269e+00, 5.697400323487776530e+00, 7.882247346334201378e+00,
+        1.123717929435969530e+01, 1.655437952523069781e+01, 2.528458931361129124e+01, 4.019700050163276117e+01,
+        6.682515670231120695e+01, 1.168022589948424530e+02, 2.160045684819153702e+02, 4.257255901158116698e+02,
+        9.017180693982791021e+02, 2.072151523320542727e+03, 5.222689557952776194e+03, 1.461663959276604441e+04,
+        4.606455611513396576e+04, 1.660950339384278845e+05, 6.976630616605097333e+05, 3.484240083705972727e+06,
+        2.117385064786894718e+07, 1.607368605379557548e+08, 1.570235957877638143e+09, 2.041619284762317483e+10,
+        3.670425964529826371e+11, 9.527196643411724126e+12, 3.749667772735766186e+14, 2.365380223523087981e+16,
+        2.546815287226970627e+18, 5.026010591299970789e+20, 1.970775914722195502e+23, 1.682531038342715298e+26,
+        3.469062187981719410e+29, 1.942614547946028081e+33, 3.375034694941022784e+37, 2.115298406181711256e+42,
+        5.673738540911562268e+47, 7.904099301170483654e+53, 7.121903115084356741e+60, 5.321820777644930491e+68,
+        4.370977753639010591e+77, 5.429657931755513797e+87, 1.464602226824232950e+99, 1.292445035662836561e+112,
+        5.936633203060705474e+126, 2.402419924621336913e+143, };
+
+__constant__ double m_weights_double_6[196] =
+      { 2.552410363565288863e-155, 7.965872719315690060e-146, 6.586401422963018216e-137, 1.563673437419490296e-128, 
+        1.149636272392214573e-120, 2.810189759625314580e-113, 2.441446149780773329e-106, 8.026292508555041710e-100,
+        1.059034284623927886e-93, 5.927259046205893861e-88, 1.482220909125121967e-82, 1.738946448501809732e-77,
+        1.002047910184021813e-72, 2.960929073720769637e-68, 4.671749731809402860e-64, 4.088398674807775827e-60,
+        2.056642628601930023e-56, 6.149878578966749305e-53, 1.128142221531950274e-49, 1.307702777646013040e-46,
+        9.848757125541659318e-44, 4.946847667192787369e-41, 1.698284656321589089e-38, 4.077947349805764486e-36,
+        6.998897321243266048e-34, 8.762183229651405846e-32, 8.156281709801700633e-30, 5.747366069381804213e-28,
+        3.117951907317865517e-26, 1.323052992594482858e-24, 4.457166057119926322e-23, 1.208896132634708032e-21,
+        2.674697849739340358e-20, 4.887394807742436672e-19, 7.461632083041868391e-18, 9.622230748739818989e-17,
+        1.058884510032627118e-15, 1.003988180288807180e-14, 8.276358838778374127e-14, 5.982281469656734375e-13,
+        3.821855766886203088e-12, 2.174279097299082001e-11, 1.109294120074848583e-10, 5.109055596902086022e-10,
+        2.137447956882816268e-09, 8.170468538364022161e-09, 2.869308592926374871e-08, 9.305185930419436742e-08,
+        2.800231592227134982e-07, 7.855263634214717091e-07, 2.062924236714395731e-06, 5.092224131071637441e-06,
+        1.185972357373608535e-05, 2.615333473470835518e-05, 5.479175746096322166e-05, 1.093962713107868416e-04,
+        2.087714243290528595e-04, 3.818797556417767457e-04, 6.712796918790164790e-04, 1.136760145626956604e-03,
+        1.858775505765622915e-03, 2.941191222579735746e-03, 4.512821350378020080e-03, 6.727293426938802892e-03,
+        9.760915371480980900e-03, 1.380842853102550981e-02, 1.907678055354397196e-02, 2.577730275571060412e-02,
+        3.411688991056810143e-02, 4.428892397843486143e-02, 5.646473816310556552e-02, 7.078637998740884103e-02,
+        8.736131246718460273e-02, 1.062595125372295046e-01, 1.275132133780278017e-01, 1.511193209351630349e-01,
+        1.770443400812491404e-01, 2.052314915777496186e-01, 2.356095985715091716e-01, 2.681032744853198083e-01,
+        3.026439500331752405e-01, 3.391813282438962329e-01, 3.776949427111484449e-01, 4.182056049753837852e-01,
+        4.607866519948383101e-01, 5.055750360563806155e-01, 5.527824318481410262e-01, 6.027066663808878454e-01,
+        6.557439076684384801e-01, 7.124021812071310501e-01, 7.733169258916167748e-01, 8.392694625821144443e-01,
+        9.112094418201526544e-01, 9.902825786957198607e-01, 1.077865293953107863e+00, 1.175608288920191064e+00,
+        1.285491624542001346e+00, 1.409894601042286311e+00, 1.551684711657329886e+00, 1.714331263928885829e+00,
+        1.902051053858215699e+00, 2.119995922515087770e+00, 2.374495377438728901e+00, 2.673372087884984440e+00,
+        3.026354489757871517e+00, 3.445619726158519068e+00, 3.946512819227006419e+00, 4.548505964859933724e+00,
+        5.276487613615791435e+00, 6.162508226184798743e+00, 7.248163842886806184e+00, 8.587878410768473380e+00,
+        1.025346434903602082e+01, 1.234051869120733230e+01, 1.497748183201988157e+01, 1.833859935862139637e+01,
+        2.266266859437541631e+01, 2.828045768298752298e+01, 3.565528397044830339e+01, 4.544381261232990127e+01,
+        5.858833744254070379e+01, 7.645876087681923606e+01, 1.010741758687003802e+02, 1.354538987141142977e+02,
+        1.841824059064608872e+02, 2.543337025162468240e+02, 3.570103970895535977e+02, 5.099537256432247190e+02,
+        7.420561390174965949e+02, 1.101323941193719451e+03, 1.669232910686306616e+03, 2.587203282090385703e+03,
+        4.106608602134535014e+03, 6.685657263550896700e+03, 1.118216368762133982e+04, 1.924811115485038079e+04,
+        3.416174865734933127e+04, 6.263882227839496242e+04, 1.189094418952240294e+05, 2.342262528110389793e+05,
+        4.798899889628646876e+05, 1.025279649144740527e+06, 2.290428015483177407e+06, 5.365618820221241118e+06,
+        1.322172034826883742e+07, 3.438296542047893623e+07, 9.468905314460992170e+07, 2.771843378168242512e+08,
+        8.658950437199969679e+08, 2.898779165825890846e+09, 1.044627762990198184e+10, 4.071673625087267154e+10,
+        1.725245696783106160e+11, 7.989856904303845909e+11, 4.067537100664303783e+12, 2.290253922913114847e+13,
+        1.435560574531699914e+14, 1.008680130601194048e+15, 8.003530334765274913e+15, 7.227937568629809266e+16,
+        7.491693576707361828e+17, 8.991671234614216799e+18, 1.261556024888540618e+20, 2.090038400033346091e+21,
+        4.132773073376509056e+22, 9.865671928781943336e+23, 2.877978132616007671e+25, 1.039303004928044064e+27,
+        4.710544722984128252e+28, 2.719194692980296464e+30, 2.030608169419634520e+32, 1.994536427964099457e+34,
+        2.622806931876485852e+36, 4.705142628855489738e+38, 1.174794916996875010e+41, 4.170574236544843559e+43,
+        2.153441953645800917e+46, 1.656794933445123415e+49, 1.948830907651317326e+52, 3.601980393005358786e+55,
+        1.077033440153993124e+59, 5.374188883861674378e+62, 4.625267105826449467e+66, 7.111646979020385006e+70,
+        2.027996051444846521e+75, 1.116168784120367146e+80, 1.237019821283735086e+85, 2.888108172342166477e+90,
+        1.490426937972460544e+96, 1.789306677271856318e+102, 5.276973875344766848e+108, 4.051217867886536330e+115,
+        8.611617868168979525e+122, 5.412634353380155695e+130, 1.078756609821147465e+139, 7.344353246966125053e+147, };
+
+__constant__ double m_weights_double_7[393] =
+      { 8.688318611421924613e-158, 6.864317997043424201e-153, 3.829638174036322920e-148, 1.524985558970066863e-143, 
+        4.379527631402474835e-139, 9.162408388991747001e-135, 1.410086556664696347e-130, 1.611529786006329005e-126,
+        1.380269212504431613e-122, 8.938739565456142404e-119, 4.414803004265274778e-115, 1.676831992534574674e-111,
+        4.937648515671545377e-108, 1.136068312653058895e-104, 2.057969760853201132e-101, 2.956779836249922681e-98,
+        3.393449014375824853e-95, 3.132619285740674842e-92, 2.341677665639346254e-89, 1.426656997926173190e-86,
+        7.128825597334931865e-84, 2.939485275517928205e-81, 1.006113300119903410e-78, 2.874969402023240560e-76,
+        6.896713338909433222e-74, 1.396405038640012785e-71, 2.398869799873387326e-69, 3.514180228970525006e-67,
+        4.411557600438730779e-65, 4.768408435763044172e-63, 4.458287229998440383e-61, 3.621710763086768959e-59,
+        2.567373174003034094e-57, 1.594829856885795944e-55, 8.716746897177859412e-54, 4.208424534880021226e-52,
+        1.801637343401221381e-50, 6.864432292330768862e-49, 2.336084584516383243e-47, 7.125716658075193173e-46,
+        1.954733295862350631e-44, 4.838195020814970471e-43, 1.083903033389729471e-41, 2.204655424309513426e-40,
+        4.083431629921110537e-39, 6.907095608064865023e-38, 1.069951518082577963e-36, 1.521972185061747284e-35,
+        1.993254198127980161e-34, 2.409552194902670884e-33, 2.695243589253751811e-32, 2.796309045342585624e-31,
+        2.697138787161831243e-30, 2.423968619042656074e-29, 2.034233848004972409e-28, 1.597498662808006882e-27,
+        1.176341105034547043e-26, 8.138404856556384931e-26, 5.300199402716282910e-25, 3.255367628680633536e-24,
+        1.889060856810273071e-23, 1.037502167741821871e-22, 5.402129194695882094e-22, 2.671080147950250592e-21,
+        1.256163163817414397e-20, 5.627458451375099018e-20, 2.405110192151924414e-19, 9.820723025892385774e-19,
+        3.836610965933493002e-18, 1.435949417965440387e-17, 5.155736116435221852e-17, 1.778106820243535736e-16,
+        5.897650538103448384e-16, 1.883545377386949394e-15, 5.799022727889041128e-15, 1.723080101027408120e-14,
+        4.946559668895564981e-14, 1.373437058883951037e-13, 3.692057356296675476e-13, 9.618669754374864080e-13,
+        2.430904641718059201e-12, 5.965319652795549281e-12, 1.422677541958913512e-11, 3.300412010407028696e-11,
+        7.453993539444124847e-11, 1.640317480539372495e-10, 3.519919455549922227e-10, 7.371241496931924727e-10,
+        1.507573517782825692e-09, 3.013444008176544118e-09, 5.891170930525923854e-09, 1.127175867596519203e-08,
+        2.112135943063526334e-08, 3.878572405868819131e-08, 6.984140168311147329e-08, 1.233979234102365865e-07,
+        2.140481233406505212e-07, 3.647293211756793211e-07, 6.108366265875129839e-07, 1.006020283089617901e-06,
+        1.630199379920459998e-06, 2.600430208375972125e-06, 4.085372746054298735e-06, 6.324194831966406940e-06,
+        9.650830226718535837e-06, 1.452455211307694488e-05, 2.156782506321975658e-05, 3.161234361554654466e-05,
+        4.575404320696170555e-05, 6.541767069965264068e-05, 9.243122234114186712e-05, 1.291101968446571125e-04,
+        1.783511762821284409e-04, 2.437337497712608884e-04, 3.296292528289701234e-04, 4.413142327104518440e-04,
+        5.850859955683163216e-04, 7.683770763700705263e-04, 9.998650298180469208e-04, 1.289573601590465490e-03,
+        1.648961132392222413e-03, 2.090991995585424661e-03, 2.630186988492201910e-03, 3.282648895332118799e-03,
+        4.066059914467245175e-03, 4.999648283080481820e-03, 6.104122218554241819e-03, 7.401570199659662364e-03,
+        8.915327597805008451e-03, 1.066981070009509413e-02, 1.269032020049755525e-02, 1.500281723149735994e-02,
+        1.763367592672867332e-02, 2.060941730962251417e-02, 2.395642996410886880e-02, 2.770068343772389725e-02,
+        3.186744063963193757e-02, 3.648097561865623097e-02, 4.156430303997019336e-02, 4.713892543167989540e-02,
+        5.322460385886412684e-02, 5.983915712308283792e-02, 6.699829390463281224e-02, 7.471548149065050122e-02,
+        8.300185389391494996e-02, 9.186616129460712899e-02, 1.013147618591979452e-01, 1.113516561340355690e-01,
+        1.219785634003157786e-01, 1.331950386328042665e-01, 1.449986280439946752e-01, 1.573850606313672716e-01,
+        1.703484726870446791e-01, 1.838816618814874884e-01, 1.979763672973498048e-01, 2.126235716643688402e-01,
+        2.278138220265254991e-01, 2.435375651517067386e-01, 2.597854941629632707e-01, 2.765489031191654411e-01,
+        2.938200465906351752e-01, 3.115925016510994851e-01, 3.298615301301230823e-01, 3.486244394295739435e-01,
+        3.678809406939879716e-01, 3.876335036292959599e-01, 4.078877077798518471e-01, 4.286525905940105684e-01,
+        4.499409931290513174e-01, 4.717699047639316286e-01, 4.941608088016098926e-01, 5.171400313514193966e-01,
+        5.407390963876342256e-01, 5.649950903858123945e-01, 5.899510404480374918e-01, 6.156563103475134535e-01,
+        6.421670194591982411e-01, 6.695464901047961714e-01, 6.978657294374126896e-01, 7.272039526349696447e-01,
+        7.576491548751669105e-01, 7.892987403432202489e-01, 8.222602173936578230e-01, 8.566519699682320391e-01,
+        8.926041164852169437e-01, 9.302594686857616145e-01, 9.697746043788558519e-01, 1.011321069700320644e+00,
+        1.055086728430498711e+00, 1.101277278143300224e+00, 1.150117955536247302e+00, 1.201855456275760449e+00,
+        1.256760098152647779e+00, 1.315128260359919236e+00, 1.377285136373095709e+00, 1.443587843343442141e+00,
+        1.514428937238563465e+00, 1.590240390338335337e+00, 1.671498096302065311e+00, 1.758726978084942299e+00,
+        1.852506785760205887e+00, 1.953478685110838140e+00, 2.062352754065132708e+00, 2.179916523112736371e+00,
+        2.307044718290330681e+00, 2.444710391817196957e+00, 2.593997656772008968e+00, 2.756116279277535182e+00,
+        2.932418425642610903e+00, 3.124417914187536020e+00, 3.333812383735923205e+00, 3.562508865047068391e+00,
+        3.812653330296280988e+00, 4.086664902155689132e+00, 4.387275531849634155e+00, 4.717576109385405085e+00,
+        5.081070154695596855e+00, 5.481736462718817995e+00, 5.924102347216244340e+00, 6.413329458204850426e+00,
+        6.955314549766230740e+00, 7.556808065486941215e+00, 8.225554008952760095e+00, 8.970455302965185036e+00,
+        9.801769746699598466e+00, 1.073134279679936208e+01, 1.177288477943655549e+01, 1.294230185297226511e+01,
+        1.425809217068106541e+01, 1.574182134943112610e+01, 1.741869467329444792e+01, 1.931824763074534781e+01,
+        2.147518163232618457e+01, 2.393037838236259586e+01, 2.673213477270754163e+01, 2.993767083537830673e+01,
+        3.361497689655818107e+01, 3.784508348524495401e+01, 4.272485990900652026e+01, 4.837047622725585887e+01,
+        5.492170063250241752e+01, 6.254725265973777743e+01, 7.145149574983117631e+01, 8.188283528217430591e+01,
+        9.414429671899321190e+01, 1.086069017070108772e+02, 1.257266497442910506e+02, 1.460661655727672308e+02,
+        1.703224100743601641e+02, 1.993623058409479084e+02, 2.342687403011957198e+02, 2.764002385528330658e+02,
+        3.274687277481591846e+02, 3.896413615832930151e+02, 4.656745019682919178e+02, 5.590908996105107215e+02,
+        6.744152109571297875e+02, 8.174887172033244140e+02, 9.958921680864290197e+02, 1.219517071629880108e+03,
+        1.501341972869855447e+03, 1.858493492282554856e+03, 2.313705362529768409e+03, 2.897337235279879262e+03,
+        3.650185874628374320e+03, 4.627425468074182920e+03, 5.904167858279871204e+03, 7.583363128219763259e+03,
+        9.807105719965428472e+03, 1.277293273832114230e+04, 1.675749596877978193e+04, 2.215121038263169759e+04,
+        2.950937349291504490e+04, 3.962820433513419525e+04, 5.365890489878942635e+04, 7.328024305737981431e+04,
+        1.009620167752942516e+05, 1.403709568321740997e+05, 1.970019955923188504e+05, 2.791695960502382133e+05,
+        3.995801250202947693e+05, 5.778515877588312220e+05, 8.445944401474017243e+05, 1.248092975135001687e+06,
+        1.865367859966950385e+06, 2.820705292493674480e+06, 4.317063433830483499e+06, 6.689961127164684387e+06,
+        1.050111601631327499e+07, 1.670327884792325766e+07, 2.693430470211696200e+07, 4.404906898054894166e+07,
+        7.309535640536363311e+07, 1.231306812701882145e+08, 2.106560568719367745e+08, 3.662073971851359192e+08,
+        6.472124787519330196e+08, 1.163486593592585616e+09, 2.128658395254150452e+09, 3.965732938755983605e+09,
+        7.527735928223242836e+09, 1.456757162128879538e+10, 2.875798636941021041e+10, 5.794999654160054887e+10,
+        1.192767536774485257e+11, 2.509334090779650360e+11, 5.399624414800303207e+11, 1.189276111740286910e+12,
+        2.683103883355551677e+12, 6.205255919751506427e+12, 1.472284072112162717e+13, 3.586628373992547853e+13,
+        8.978594107356889337e+13, 2.311710197091641250e+14, 6.127020712804348908e+14, 1.673232679378485978e+15,
+        4.712671499032329365e+15, 1.370275025680988289e+16, 4.117347054027612886e+16, 1.279822436878842710e+17,
+        4.119762767831332886e+17, 1.374888606936629814e+18, 4.762483833659790733e+18, 1.714288404980390540e+19,
+        6.420200704842635702e+19, 2.504808062315322558e+20, 1.019355251138167687e+21, 4.332952958521756932e+21,
+        1.926416464889827426e+22, 8.971059571108856501e+22, 4.382317748928748816e+23, 2.249003059943548727e+24,
+        1.214458587662725100e+25, 6.911683912813140938e+25, 4.152578123301633020e+26, 2.638346388179288086e+27,
+        1.775811490887700718e+28, 1.268552401544524965e+29, 9.635786341213661742e+29, 7.797939379813000783e+30,
+        6.736900087983560033e+31, 6.226288752443836475e+32, 6.169035287163451891e+33, 6.567250104576983172e+34,
+        7.528666735185428595e+35, 9.316271421365627344e+36, 1.247410737003664698e+38, 1.811787648043939987e+39,
+        2.861918583157116420e+40, 4.929657099622567574e+41, 9.284951278562156071e+42, 1.917687997037326435e+44,
+        4.355948096683946408e+45, 1.091453486585817118e+47, 3.026206402784023251e+48, 9.314478983991942688e+49,
+        3.193195693823940775e+51, 1.223447678968662613e+53, 5.257403184148516426e+54, 2.543108925126136766e+56,
+        1.389947584026783879e+58, 8.616987336205957549e+59, 6.083777056769299984e+61, 4.911841077800001710e+63,
+        4.554259483169784661e+65, 4.870815185962582259e+67, 6.036211886847067841e+69, 8.708377755587698026e+71,
+        1.469655296381977267e+74, 2.915822924489215887e+76, 6.836044306573246016e+78, 1.903917300559946782e+81,
+        6.333813341980360028e+83, 2.531082268773868753e+86, 1.222077360592898816e+89, 7.172167453276776330e+91,
+        5.148160232410244898e+94, 4.548619807672339638e+97, 4.979632843475864923e+100, 6.800802744782331957e+103,
+        1.166855497965918386e+107, 2.533457765534279043e+110, 7.012864641215147208e+113, 2.494083354169569414e+117,
+        1.148722178881219993e+121, 6.908313932158993510e+124, 5.470912484744367184e+128, 5.755359832684120769e+132,
+        8.115681923907451939e+136, 1.548304780334447081e+141, 4.034912159113614601e+145, 1.450632759611715526e+150,
+        7.268799665580789770e+154, };
+
+__constant__ double m_weights_double_8[786] =
+      { 4.901759085947701448e-159, 1.505832423620814399e-156, 4.231872109262999523e-154, 1.089479701785106001e-151,
+        2.572922387150651649e-149, 5.581311054334156941e-147, 1.113575900126970040e-144, 2.046165051332286084e-142,
+        3.466994885004770636e-140, 5.423795404073501922e-138, 7.843833272402847010e-136, 1.049922957933194415e-133,
+        1.302301071957418603e-131, 1.498659737828393008e-129, 1.601906622414286282e-127, 1.592248618401983561e-125,
+        1.473375345916436274e-123, 1.270651551394009593e-121, 1.022408263525766209e-119, 7.683762602329562781e-118,
+        5.399268127233373186e-116, 3.551074274853494676e-114, 2.188235409519121010e-112, 1.264667515430816934e-110,
+        6.861807566737243712e-109, 3.498691686825209963e-107, 1.678016807398375157e-105, 7.577439431441931490e-104,
+        3.224703770159386809e-102, 1.294487090677705963e-100, 4.906133250963454139e-99, 1.757121317988153326e-97,
+        5.952042491454320383e-96, 1.908566653286417264e-94, 5.798224459236429212e-93, 1.670293239978334727e-91,
+        4.566236673398083038e-90, 1.185617342791547945e-88, 2.926160027801296929e-87, 6.870061134126707137e-86,
+        1.535565783500379945e-84, 3.270036736778401257e-83, 6.639558007206580362e-82, 1.286319750967398593e-80,
+        2.379566581139022958e-79, 4.206268231398883425e-78, 7.109719237833379433e-77, 1.149915104115372777e-75,
+        1.780876201255594220e-74, 2.642703796179329883e-73, 3.760085375941719327e-72, 5.132920951124251993e-71,
+        6.727100274601427696e-70, 8.469585621347697498e-69, 1.025032382672232848e-67, 1.193219127557863348e-66,
+        1.336816930381306582e-65, 1.442283479679798385e-64, 1.499374555004793991e-63, 1.502797203133501438e-62,
+        1.453005969318485303e-61, 1.355980448377862540e-60, 1.222072412212552127e-59, 1.064223180270520159e-58,
+        8.959667396075636845e-58, 7.296288808079294105e-57, 5.750255296190181158e-56, 4.388011664829013518e-55,
+        3.243852451291832398e-54, 2.324239357665538806e-53, 1.614869776203026446e-52, 1.088524605545274842e-51,
+        7.121755574192829045e-51, 4.524647662549067074e-50, 2.792730715818793035e-49, 1.675384879603864227e-48,
+        9.773114328777676091e-48, 5.545910766847627082e-47, 3.062809705627873645e-46, 1.646862118038266234e-45,
+        8.625108513887155847e-45, 4.401687663868890701e-44, 2.189755778847646746e-43, 1.062345336449265889e-42,
+        5.028036663485684049e-42, 2.322524635717249223e-41, 1.047406593898341306e-40, 4.613438388449698168e-40,
+        1.985397445118162005e-39, 8.351027367454628343e-39, 3.434440903484543389e-38, 1.381489131877196646e-37,
+        5.437051201310225224e-37, 2.094357548080647717e-36, 7.898676618592006902e-36, 2.917536870947471272e-35,
+        1.055788886022716597e-34, 3.744333812160330812e-34, 1.301801185251957290e-33, 4.438346216893387768e-33,
+        1.484348268951816542e-32, 4.871001129849836971e-32, 1.568903000742513942e-31, 4.961295315917935235e-31,
+        1.540773910027990821e-30, 4.700558022172014910e-30, 1.409115230718949596e-29, 4.151913103955692034e-29,
+        1.202737613715427748e-28, 3.426327374934496736e-28, 9.601405359397026012e-28, 2.647278642033773301e-27,
+        7.183442220565147103e-27, 1.918850545981494042e-26, 5.046974779455992494e-26, 1.307394799925911700e-25,
+        3.336342198236957082e-25, 8.389259581136262194e-25, 2.079051813513548608e-24, 5.079178967243765280e-24,
+        1.223501794357837278e-23, 2.906654911057549530e-23, 6.811668606095015470e-23, 1.574985938238025303e-22,
+        3.593796788969348326e-22, 8.094185411205212564e-22, 1.799796183237481721e-21, 3.951758901641017285e-21,
+        8.569580068050865775e-21, 1.835753486517298696e-20, 3.885414339966022317e-20, 8.126613972895021790e-20,
+        1.680007182889503141e-19, 3.433369351563962828e-19, 6.937695550399427499e-19, 1.386345631008981755e-18,
+        2.740087497759230881e-18, 5.357570288683386626e-18, 1.036464933022803784e-17, 1.984249442010084992e-17,
+        3.759788006060003409e-17, 7.052211261821684795e-17, 1.309635641529546221e-16, 2.408275496109180528e-16,
+        4.385898809611711552e-16, 7.911758686849121285e-16, 1.413883597877183873e-15, 2.503477536644680210e-15,
+        4.392637866550705827e-15, 7.638710306960574612e-15, 1.316703360377476041e-14, 2.250031027275448919e-14,
+        3.812239733412214953e-14, 6.405021660191363479e-14, 1.067250538270319484e-13, 1.763897493784721010e-13,
+        2.891987565334547756e-13, 4.704242520369958085e-13, 7.592878273512691990e-13, 1.216183338372525172e-12,
+        1.933388593436624879e-12, 3.050826852442290751e-12, 4.779080020017636657e-12, 7.432734713385425098e-12,
+        1.147833888125873666e-11, 1.760286160372422754e-11, 2.681071101623953168e-11, 4.056023754295965437e-11,
+        6.095443492241537222e-11, 9.100550129616064211e-11, 1.349993452136967652e-10, 1.989943912395156051e-10,
+        2.914996073619059788e-10, 4.243900781412219621e-10, 6.141353162671391082e-10, 8.834365795894798511e-10,
+        1.263395594025933170e-09, 1.796369250051716047e-09, 2.539704143326480862e-09, 3.570592498287890499e-09,
+        4.992348403150539107e-09, 6.942471870489931483e-09, 9.602949600164561371e-09, 1.321333712761666777e-08,
+        1.808727901635346390e-08, 2.463325364767791516e-08, 3.338047870136870496e-08, 4.501108426108505069e-08,
+        6.039985413333259594e-08, 8.066305374526097834e-08, 1.072181059018892614e-07, 1.418561443795353991e-07,
+        1.868297699836383305e-07, 2.449586539172972009e-07, 3.197559780442760832e-07, 4.155790690867544334e-07,
+        5.378079713325544678e-07, 6.930561064776686194e-07, 8.894175852502122454e-07, 1.136756157868726006e-06,
+        1.447041212534730898e-06, 1.834736645332833504e-06, 2.317248822354253644e-06, 2.915440225825303911e-06,
+        3.654215709863551870e-06, 4.563188576773760151e-06, 5.677433909482232878e-06, 7.038336747307571784e-06,
+        8.694542758083067228e-06, 1.070301902702759858e-05, 1.313023243937403750e-05, 1.605345286789073897e-05,
+        1.956218797728780449e-05, 2.375975591555218862e-05, 2.876500146954361208e-05, 3.471416041263076209e-05,
+        4.176287576185915239e-05, 5.008836848967403773e-05, 5.989176390181730373e-05, 7.140057340280213227e-05,
+        8.487132973049760036e-05, 1.005923719620999934e-04, 1.188867746885496973e-04, 1.401154137398069279e-04,
+        1.646801587388731249e-04, 1.930271805904271778e-04, 2.256503597954330556e-04, 2.630947792533707128e-04,
+        3.059602829980946180e-04, 3.549050801425155303e-04, 4.106493712131842727e-04, 4.739789720708565436e-04,
+        5.457489087697051069e-04, 6.268869550379884668e-04, 7.183970825975973673e-04, 8.213627933082928901e-04,
+        9.369503011517966364e-04, 1.066411531385725184e-03, 1.211086903819095417e-03, 1.372407867107646339e-03,
+        1.551899151252505624e-03, 1.751180706119547318e-03, 1.971969294784470944e-03, 2.216079711850908971e-03,
+        2.485425598581779636e-03, 2.782019828718993257e-03, 3.107974441230220176e-03, 3.465500098895993776e-03,
+        3.856905054613959619e-03, 4.284593610523639393e-03, 4.751064058515097225e-03, 5.258906094345618421e-03,
+        5.810797701414435799e-03, 6.409501504198915943e-03, 7.057860595396970186e-03, 7.758793844909123446e-03,
+        8.515290702888369372e-03, 9.330405513145299523e-03, 1.020725135717912572e-02, 1.114899345297222760e-02,
+        1.215884213639836574e-02, 1.324004545661629463e-02, 1.439588142011718850e-02, 1.562964992113485073e-02,
+        1.694466439888404584e-02, 1.834424326453982033e-02, 1.983170114298836870e-02, 2.141033997615067889e-02,
+        2.308344003609062690e-02, 2.485425089716015368e-02, 2.672598241710042669e-02, 2.870179577730820310e-02,
+        3.078479463239356953e-02, 3.297801641870515720e-02, 3.528442387069167064e-02, 3.770689679281728890e-02,
+        4.024822413326941635e-02, 4.291109640390936770e-02, 4.569809848884132640e-02, 4.861170288163592155e-02,
+        5.165426338866744454e-02, 5.482800933323496446e-02, 5.813504029216542680e-02, 6.157732139347005467e-02,
+        6.515667920037330165e-02, 6.887479820368566403e-02, 7.273321794107712090e-02, 7.673333075835566151e-02,
+        8.087638022439339824e-02, 8.516346020789830747e-02, 8.959551462082867423e-02, 9.417333782991444898e-02,
+        9.889757573450802477e-02, 1.037687275058577967e-01, 1.087871479799008567e-01, 1.139530506928239996e-01,
+        1.192665115459606141e-01, 1.247274730840887416e-01, 1.303357493688843496e-01, 1.360910314271734020e-01,
+        1.419928932517243620e-01, 1.480407983306351483e-01, 1.542341066798992024e-01, 1.605720823524863565e-01,
+        1.670539013962460335e-01, 1.736786602321317742e-01, 1.804453844236544912e-01, 1.873530378080931153e-01,
+        1.944005319598201097e-01, 2.015867359561292115e-01, 2.089104864161762672e-01, 2.163705977840528187e-01,
+        2.239658728275971045e-01, 2.316951133252986765e-01, 2.395571309145607347e-01, 2.475507580756380088e-01,
+        2.556748592267567912e-01, 2.639283419072366399e-01, 2.723101680268593668e-01, 2.808193651612593497e-01,
+        2.894550378747292326e-01, 2.982163790535362503e-01, 3.071026812346166036e-01, 3.161133479163487600e-01,
+        3.252479048399920142e-01, 3.345060112323053140e-01, 3.438874710018250777e-01, 3.533922438832718793e-01, 
+        3.630204565265675291e-01, 3.727724135289699431e-01, 3.826486084108677024e-01, 3.926497345378144818e-01,
+        4.027766959934214472e-01, 4.130306184097598756e-01, 4.234128597639539906e-01, 4.339250211516634154e-01,
+        4.445689575501645526e-01, 4.553467885857401860e-01, 4.662609093220769612e-01, 4.773140010883521767e-01,
+        4.885090423676662636e-01, 4.998493197684479070e-01, 5.113384391034281429e-01, 5.229803366027518117e-01,
+        5.347792902897740156e-01, 5.467399315500809553e-01, 5.588672569262846167e-01, 5.711666401731758417e-01,
+        5.836438446098876156e-01, 5.963050358078278898e-01, 6.091567946552975691e-01, 6.222061308419237716e-01,
+        6.354604968083211637e-01, 6.489278022087558681e-01, 6.626164289370386795e-01, 6.765352467684294227e-01,
+        6.906936296730053994e-01, 7.051014728587479919e-01, 7.197692106055475377e-01, 7.347078349544334315e-01,
+        7.499289153196209421e-01, 7.654446190944464391e-01, 7.812677333259577661e-01, 7.974116875368567865e-01,
+        8.138905777776784362e-01, 8.307191919965581771e-01, 8.479130368187123741e-01, 8.654883658328603475e-01,
+        8.834622094872810766e-01, 9.018524067040521621e-01, 9.206776383262963142e-01, 9.399574625199963151e-01,
+        9.597123522591707284e-01, 9.799637350309700387e-01, 1.000734034905599933e+00, 1.022046717124952010e+00,
+        1.043926335373472893e+00, 1.066398581905185161e+00, 1.089490340711946628e+00, 1.113229743930062164e+00,
+        1.137646231695313314e+00, 1.162770615670420260e+00, 1.188635146483979071e+00, 1.215273585336112390e+00,
+        1.242721280043529050e+00, 1.271015245815510799e+00, 1.300194251072644711e+00, 1.330298908642019971e+00,
+        1.361371772686240192e+00, 1.393457441749111730e+00, 1.426602668328411758e+00, 1.460856475415888358e+00,
+        1.496270280476785338e+00, 1.532898027375920169e+00, 1.570796326794896619e+00, 1.610024605725646420e+00,
+        1.650645266669431435e+00, 1.692723857217988332e+00, 1.736329250744977731e+00, 1.781533838991654903e+00,
+        1.828413737391087381e+00, 1.877049004040720448e+00, 1.927523873304087635e+00, 1.979927005099477087e+00,
+        2.034351751016940433e+00, 2.090896438495766214e+00, 2.149664674393090421e+00, 2.210765669381402212e+00,
+        2.274314584729113927e+00, 2.340432903144970240e+00, 2.409248825504827076e+00, 2.480897695429288043e+00,
+        2.555522453844001656e+00, 2.633274125832370887e+00, 2.714312342284411608e+00, 2.798805899057066353e+00,
+        2.886933356592141886e+00, 2.978883683190077867e+00, 3.074856945413050211e+00, 3.175065049391765683e+00,
+        3.279732537139255280e+00, 3.389097442334834102e+00, 3.503412210435275865e+00, 3.622944688401595705e+00,
+        3.747979189802462585e+00, 3.878817641573403805e+00, 4.015780819279312670e+00, 4.159209678351536168e+00,
+        4.309466789455788368e+00, 4.466937886899736897e+00, 4.632033539816493591e+00, 4.805190956770360727e+00,
+        4.986875935432896972e+00, 5.177584970080537688e+00, 5.377847530880629761e+00, 5.588228530273088035e+00,
+        5.809330993233640059e+00, 6.041798949837089488e+00, 6.286320570342285919e+00, 6.543631565013652661e+00,
+        6.814518873098582608e+00, 7.099824667819718682e+00, 7.400450706942931008e+00, 7.717363061475788814e+00,
+        8.051597258371279584e+00, 8.404263876795383951e+00, 8.776554641607500109e+00, 9.169749062247565207e+00,
+        9.585221670276993889e+00, 1.002444991444300704e+01, 1.048902277839603856e+01, 1.098065019316492606e+01,
+        1.150117332427169985e+01, 1.205257582204547280e+01, 1.263699613338454324e+01, 1.325674098404332380e+01,
+        1.391430015262873368e+01, 1.461236267104086712e+01, 1.535383460126837531e+01, 1.614185855545811846e+01,
+        1.697983514525758524e+01, 1.787144656784601339e+01, 1.882068256013178484e+01, 1.983186897964764985e+01,
+        2.090969930111845450e+01, 2.205926935196095527e+01, 2.328611564861881683e+01, 2.459625773922860138e+01,
+        2.599624500732998276e+01, 2.749320844694889238e+01, 2.909491798228195984e+01, 3.080984597641076715e+01,
+        3.264723765414180400e+01, 3.461718925554321861e+01, 3.673073484057443067e+01, 3.899994278315456980e+01,
+        4.143802312713618427e+01, 4.405944712930142330e+01, 4.688008048840357439e+01, 4.991733195758662298e+01,
+        5.319031926387298369e+01, 5.672005451703465811e+01, 6.052965158594831140e+01, 6.464455825915836491e+01,
+        6.909281639443131774e+01, 7.390535370725211687e+01, 7.911631135942343489e+01, 8.476341209659472308e+01,
+        9.088837435982152722e+01, 9.753737857533253823e+01, 1.047615927251647361e+02, 1.126177653386554197e+02,
+        1.211688952437418817e+02, 1.304849888043593828e+02, 1.406439169773708701e+02, 1.517323863863765989e+02,
+        1.638470407739824279e+02, 1.770957117100033620e+02, 1.915988403612775885e+02, 2.074910955409497265e+02,
+        2.249232172361061194e+02, 2.440641194630869936e+02, 2.651032917390266964e+02, 2.882535448280364212e+02,
+        3.137541538897424513e+02, 3.418744609277612322e+02, 3.729180087461214321e+02, 4.072272907593818790e+02,
+        4.451892153103389878e+02, 4.872414000388630927e+02, 5.338794318098249932e+02, 5.856652513400113117e+02,
+        6.432368496766822816e+02, 7.073194969336578611e+02, 7.787387632221277236e+02, 8.584356387770406827e+02,
+        9.474841163944599543e+02, 1.047111666301969297e+03, 1.158723113719277435e+03, 1.283928525349707755e+03,
+        1.424575826189363437e+03, 1.582789006393775706e+03, 1.761012944445459235e+03, 1.962066073573121788e+03,
+        2.189202360708354222e+03, 2.446184360349559652e+03, 2.737369460761187093e+03, 3.067811870808767638e+03,
+        3.443383419509962754e+03, 3.870916878218207705e+03, 4.358376293464465508e+03, 4.915059769420260559e+03,
+        5.551841303216967404e+03, 6.281459704453426129e+03, 7.118864385205665710e+03, 8.081629967627799596e+03,
+        9.190454321738597280e+03, 1.046975794051835702e+04, 1.194840663946247320e+04, 1.366058463062104793e+04,
+        1.564685131637809273e+04, 1.795542299179967539e+04, 2.064373043744082514e+04, 2.378031563732670807e+04,
+        2.744714621995650953e+04, 3.174244552480722739e+04, 3.678416050731336226e+04, 4.271422037773508051e+04,
+        4.970377768100323981e+04, 5.795967273138576164e+04, 6.773242484608792593e+04, 7.932613346949942761e+04,
+        9.311077397156915450e+04, 1.095375030536372224e+05, 1.291577556735669526e+05, 1.526471301608741586e+05,
+        1.808353350969648289e+05, 2.147438294770164181e+05, 2.556332515573999948e+05, 3.050633345562097502e+05,
+        3.649687926665853954e+05, 4.377556866857485380e+05, 5.264241222943208736e+05, 6.347248990108319410e+05,
+        7.673600526542426466e+05, 9.302403050337502786e+05, 1.130816502666451845e+06, 1.378507531155523742e+06,
+        1.685254393964162275e+06, 2.066239770168639390e+06, 2.540825270229354918e+06, 3.133775962036416630e+06,
+        3.876865148275802393e+06, 4.810984054018349430e+06, 5.988924089534678664e+06, 7.479057929608060924e+06,
+        9.370225698693408867e+06, 1.177824230977510661e+07, 1.485459301432580619e+07, 1.879809270383398104e+07,
+        2.387057334436346400e+07, 3.041806552258603202e+07, 3.889950046843262151e+07, 4.992574374586696017e+07,
+        6.431287504495613210e+07, 8.315518519925858136e+07, 1.079255664704117961e+08, 1.406141073390035115e+08,
+        1.839201785677305607e+08, 2.415197116904975365e+08, 3.184386015381112281e+08, 4.215765018929686736e+08,
+        5.604446356915114550e+08, 7.482094398046911572e+08, 1.003175129668246151e+09, 1.350898918997482870e+09,
+        1.827222165053491590e+09, 2.482633480831760933e+09, 3.388577637234919719e+09, 4.646620065299105644e+09,
+        6.401821801566297122e+09, 8.862352038053251473e+09, 1.232838602859196811e+10, 1.723489297480180023e+10,
+        2.421530528469447376e+10, 3.419673813208063025e+10, 4.854312364622606540e+10, 6.927149043760342676e+10,
+        9.938049490186203616e+10, 1.433521424759854145e+11, 2.079221734483088227e+11, 3.032695241820108158e+11,
+        4.448631503727710431e+11, 6.563458646477901051e+11, 9.740635696398910980e+11, 1.454220520059656158e+12,
+        2.184250688898627320e+12, 3.300999104757560757e+12, 5.019970485022749012e+12, 7.682676299017607834e+12,
+        1.183376596003983872e+13, 1.834748853557035315e+13, 2.863639312458363586e+13, 4.499803892715039958e+13,
+        7.119486876989154498e+13, 1.134307017980122346e+14, 1.820065782363618395e+14, 2.941484500615394037e+14,
+        4.788707305890930382e+14, 7.854025036928623551e+14, 1.297894304619860251e+15, 2.161279954782425640e+15,
+        3.627102147035003834e+15, 6.135342933440950378e+15, 1.046170006362244506e+16, 1.798477357839665686e+16,
+        3.117473412332331475e+16, 5.449445073049184222e+16, 9.607515505017978212e+16, 1.708589224452677852e+17,
+        3.065429751110228665e+17, 5.549227437451149511e+17, 1.013730232778046314e+18, 1.869059895876405824e+18,
+        3.478549552381578424e+18, 6.535992245975463763e+18, 1.240019272261066308e+19, 2.375828866910936629e+19,
+        4.597682433604432625e+19, 8.988106816837128428e+19, 1.775302379393632263e+20, 3.543413304390973486e+20,
+        7.148061397675525327e+20, 1.457620510577186305e+21, 3.005137124879829797e+21, 6.265024861633250697e+21,
+        1.320979941090283816e+22, 2.817487535902146221e+22, 6.079933041429805231e+22, 1.327658853647212083e+23,
+        2.934311759183641318e+23, 6.565087216807130026e+23, 1.487212273437937650e+24, 3.411840196076788128e+24,
+        7.928189928797018762e+24, 1.866451877029704857e+25, 4.452521859886739549e+25, 1.076545435174977662e+26,
+        2.638685681190697586e+26, 6.557908470244186498e+26, 1.652952243735585721e+27, 4.226383395914916199e+27,
+        1.096450394268080148e+28, 2.886822082999286080e+28, 7.715480389344015925e+28, 2.093728789309964846e+29,
+        5.770275789447655037e+29, 1.615463845391781140e+30, 4.595470055795608691e+30, 1.328629392686523255e+31,
+        3.905079681530784219e+31, 1.167134024271997252e+32, 3.548058538654277403e+32, 1.097378059358046160e+33,
+        3.454102978064445595e+33, 1.106745393701652323e+34, 3.610899559139069994e+34, 1.199946999283670567e+35,
+        4.062687014190878792e+35, 1.401835223893224514e+36, 4.931085527333162173e+36, 1.768812393284919500e+37,
+        6.472148293945199961e+37, 2.416453721739211922e+38, 9.208944720398123862e+38, 3.583297028622126676e+39,
+        1.424097482596699440e+40, 5.782627833426411524e+40, 2.399862204084363183e+41, 1.018291572042305460e+42,
+        4.419105414822034531e+42, 1.962126117680499311e+43, 8.916742424061253707e+43, 4.148882478294757720e+44,
+        1.977256529558276930e+45, 9.655300233875401080e+45, 4.832878898335598922e+46, 2.480575878223098058e+47,
+        1.306102809757654706e+48, 7.057565717289569232e+48, 3.915276522229618618e+49, 2.230898980943393318e+50,
+        1.306141334496309306e+51, 7.861021286656392627e+51, 4.865583758538451107e+52, 3.098487425915704674e+53,
+        2.031037614862563901e+54, 1.370999647608260200e+55, 9.534736274325001528e+55, 6.834959923166415407e+56,
+        5.052733546324789020e+57, 3.853810997282159979e+58, 3.034183107853208298e+59, 2.467161926009838899e+60,
+        2.072901039813580593e+61, 1.800563980579615383e+62, 1.617764027895344257e+63, 1.504283028250688329e+64,
+        1.448393206525427172e+65, 1.444855510980115799e+66, 1.494120428855029243e+67, 1.602566566107015722e+68,
+        1.783880504153942988e+69, 2.061999240572760738e+70, 2.476521794698572715e+71, 3.092349914153497358e+72,
+        4.016927238305985810e+73, 5.431607545226497387e+74, 7.650086824042822759e+75, 1.123017984114349288e+77,
+        1.719382952966052004e+78, 2.747335718690686674e+79, 4.584545010557684123e+80, 7.995082041539250252e+81,
+        1.458119909365899044e+83, 2.783001178679600175e+84, 5.562812231966194628e+85, 1.165338768982404578e+87,
+        2.560399126432838224e+88, 5.904549641859098192e+89, 1.430278474749838710e+91, 3.642046122956932563e+92,
+        9.756698571206402300e+93, 2.751946044275883051e+95, 8.179164793643197279e+96, 2.563704735086825890e+98,
+        8.481656496128255880e+99, 2.964260254403981007e+101, 1.095342970031208886e+103, 4.283148547584870628e+104,
+        1.773954352944319744e+106, 7.788991081894224760e+107, 3.628931721056821352e+109, 1.795729272516020592e+111,
+        9.446685151482835339e+112, 5.288263179614488101e+114, 3.153311236741401362e+116, 2.004807079683827669e+118,
+        1.360407192665237716e+120, 9.862825609807810517e+121, 7.647551788591128099e+123, 6.348802224871730088e+125,
+        5.649062361980019098e+127, 5.393248003523784781e+129, 5.530897191915703916e+131, 6.099598644640894333e+133,
+        7.242098433491964504e+135, 9.268083053637375570e+137, 1.279942702416040582e+140, 1.909796626960621302e+142,
+        3.082540300669885040e+144, 5.388809732384179657e+146, 1.021610251056626535e+149, 2.103005440072790650e+151,
+        4.706753990348725570e+153, 1.146834128125248991e+156, };
+
+__constant__ double* m_weights_double[8] = {
+    m_weights_double_1,
+    m_weights_double_2,
+    m_weights_double_3,
+    m_weights_double_4,
+    m_weights_double_5,
+    m_weights_double_6,
+    m_weights_double_7,
+    m_weights_double_8
+};
+
+__constant__ boost::math::size_t float_coefficients_size[8] = {9, 8, 16, 33, 66, 132, 263, 527};
+
+__constant__ boost::math::size_t double_coefficients_size[8] = {13, 12, 25, 49, 98, 196, 393, 786};
+
+template<typename T>
+struct coefficients_selector;
+
+template<>
+struct coefficients_selector<float>
+{
+    __device__ static const auto abscissas() { return m_abscissas_float; }
+    __device__ static const auto weights() { return m_weights_float; }
+    __device__ static const auto size() { return float_coefficients_size; }
+};
+
+template<>
+struct coefficients_selector<double>
+{
+    __device__ static const auto abscissas() { return m_abscissas_double; }
+    __device__ static const auto weights() { return m_weights_double; }
+    __device__ static const auto size() { return double_coefficients_size; }
+};
+
+
+template <typename F, typename Real, typename Policy = policies::policy<> >
+__device__ auto exp_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+{
+    using K = decltype(f(static_cast<Real>(0)));
+    using boost::math::constants::half;
+    using boost::math::constants::half_pi;
+
+   // This provided a nice error message for real valued integrals, but it's super awkward for complex-valued integrals:
+   /*K y_max = f(tools::max_value<Real>());
+   if(abs(y_max) > tools::epsilon<Real>() || !(boost::math::isfinite)(y_max))
+   {
+       K val = abs(y_max);
+       return static_cast<K>(policies::raise_domain_error(function, "The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", val, Policy()));
+   }*/
+
+   //std::cout << std::setprecision(5*std::numeric_limits<Real>::digits10);
+
+    // Get the party started with two estimates of the integral:
+    const auto m_abscissas = coefficients_selector<Real>::abscissas();
+    const auto m_weights = coefficients_selector<Real>::weights();
+    const auto m_size = coefficients_selector<Real>::size();
+    
+    Real min_abscissa{ 0 }, max_abscissa{ boost::math::tools::max_value<Real>() };
+    K I0 = 0;
+    Real L1_I0 = 0;
+    for(boost::math::size_t i = 0; i < m_size[0]; ++i)
+    {
+        K y = f(m_abscissas[0][i]);
+        K I0_last = I0;
+        I0 += y*m_weights[0][i];
+        L1_I0 += abs(y)*m_weights[0][i];
+        if ((I0_last == I0) && (abs(I0) != 0))
+        {
+           max_abscissa = m_abscissas[0][i];
+           break;
+        }
+    }
+
+    //std::cout << "First estimate : " << I0 << std::endl;
+    K I1 = I0;
+    Real L1_I1 = L1_I0;
+    bool have_first_j = false;
+    boost::math::size_t first_j = 0;
+    for (boost::math::size_t i = 0; (i < m_size[1]) && (m_abscissas[1][i] < max_abscissa); ++i)
+    {
+        K y = f(m_abscissas[1][i]);
+        K I1_last = I1;
+        I1 += y*m_weights[1][i];
+        L1_I1 += abs(y)*m_weights[1][i];
+        if (!have_first_j && (I1_last == I1))
+        {
+           // No change to the sum, disregard these values on the LHS:
+           if ((i < m_size[1] - 1) && (m_abscissas[1][i + 1] > max_abscissa))
+           {
+              // The summit is so high, that we found nothing in this row which added to the integral!!
+              have_first_j = true;
+           }
+           else
+           {
+              min_abscissa = m_abscissas[1][i];
+              first_j = i;
+           }
+        }
+        else
+        {
+           have_first_j = true;
+        }
+    }
+
+    if (I0 == static_cast<Real>(0))
+    {
+       // We failed to find anything, is the integral zero, or have we just not found it yet?
+       // We'll try one more level, if that still finds nothing then it'll terminate.
+       min_abscissa = 0;
+       max_abscissa = boost::math::tools::max_value<Real>();
+    }
+
+    I1 *= half<Real>();
+    L1_I1 *= half<Real>();
+    Real err = abs(I0 - I1);
+    //std::cout << "Second estimate: " << I1 << " Error estimate at level " << 1 << " = " << err << std::endl;
+
+    boost::math::size_t i = 2;
+    for(; i < 8U; ++i) // Magic number 8 is the number of precomputed levels
+    {
+        I0 = I1;
+        L1_I0 = L1_I1;
+
+        I1 = half<Real>()*I0;
+        L1_I1 = half<Real>()*L1_I0;
+        Real h = static_cast<Real>(1)/static_cast<Real>(1 << i);
+        K sum = 0;
+        Real absum = 0;
+
+        auto& abscissas_row = m_abscissas[i];
+        auto& weight_row = m_weights[i];
+
+        // appoximate location to start looking for lowest meaningful abscissa value
+        first_j = first_j == 0 ? 0 : 2 * first_j - 1;  
+        
+        boost::math::size_t j = first_j;
+        while (abscissas_row[j] < min_abscissa)
+        {
+           ++j;
+        }
+
+        for(; (j < m_size[i]) && (abscissas_row[j] < max_abscissa); ++j)
+        {
+            Real x = abscissas_row[j];
+            K y = f(x);
+            sum += y*weight_row[j];
+            Real abterm0 = abs(y)*weight_row[j];
+            absum += abterm0;
+        }
+
+        I1 += sum*h;
+        L1_I1 += absum*h;
+        err = abs(I0 - I1);
+        if (!(boost::math::isfinite)(L1_I1))
+        {
+            return static_cast<K>(policies::raise_evaluation_error("exp_sinh_integrate", "The exp_sinh quadrature evaluated your function at a singular point and returned %1%. Please ensure your function evaluates to a finite number over its entire domain.", I1, Policy()));
+        }
+        if (err <= tolerance*L1_I1)
+        {
+            break;
+        }
+    }
+
+    if (error)
+    {
+        *error = err;
+    }
+
+    if(L1)
+    {
+        *L1 = L1_I1;
+    }
+
+    if (levels)
+    {
+       *levels = i;
+    }
+
+    return I1;
+}
+
+} // namespace detail
+} // namespace quadrature
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
+#endif // BOOST_MATH_QUADRATURE_DETAIL_EXP_SINH_DETAIL_HPP
diff --git a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp
index a9e1ef4931..7f7477a6e6 100644
--- a/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp
+++ b/include/boost/math/quadrature/detail/sinh_sinh_detail.hpp
@@ -1,4 +1,5 @@
 // Copyright Nick Thompson, 2017
+// Copyright Matt Borland, 2024
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -7,6 +8,10 @@
 #ifndef BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
 #define BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <string>
 #include <vector>
@@ -15,7 +20,6 @@
 #include <boost/math/tools/atomic.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/trunc.hpp>
-#include <boost/math/tools/config.hpp>
 
 #ifdef BOOST_MATH_HAS_THREADS
 #include <mutex>
@@ -485,4 +489,865 @@ void sinh_sinh_detail<Real, Policy>::init(const std::integral_constant<int, 4>&)
 #endif
 
 }}}}
-#endif
+
+#endif // BOOST_MATH_HAS_NVRTC
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+namespace boost { 
+namespace math { 
+namespace quadrature {
+namespace detail {
+
+__constant__ float m_abscissas_float_1[4] = 
+      { 3.08828742e+00f, 1.48993185e+02f, 3.41228925e+06f, 2.06932577e+18f, };
+
+__constant__ float m_abscissas_float_2[4] =
+      { 9.13048763e-01f, 1.41578929e+01f, 6.70421552e+03f, 9.64172533e+10f, };
+
+__constant__ float m_abscissas_float_3[8] =
+      { 4.07297690e-01f, 1.68206671e+00f, 6.15089799e+00f, 4.00396235e+01f, 7.92920025e+02f, 1.02984971e+05f, 
+        3.03862311e+08f, 1.56544547e+14f, };
+
+__constant__ float m_abscissas_float_4[16] =
+      { 1.98135272e-01f, 6.40155674e-01f, 1.24892870e+00f, 2.26608084e+00f, 4.29646270e+00f, 9.13029039e+00f, 
+        2.31110765e+01f, 7.42770603e+01f, 3.26720921e+02f, 2.15948569e+03f, 2.41501526e+04f, 5.31819400e+05f, 
+        2.80058686e+07f, 4.52406508e+09f, 3.08561257e+12f, 1.33882673e+16f, };
+
+__constant__ float m_abscissas_float_5[32] =
+      { 9.83967894e-02f, 3.00605618e-01f, 5.19857979e-01f, 7.70362083e-01f, 1.07131137e+00f, 1.45056976e+00f, 
+        1.95077855e+00f, 2.64003177e+00f, 3.63137237e+00f, 5.11991533e+00f, 7.45666098e+00f, 1.13022613e+01f, 
+        1.79641069e+01f, 3.01781070e+01f, 5.40387580e+01f, 1.04107731e+02f, 2.18029520e+02f, 5.02155699e+02f, 
+        1.28862131e+03f, 3.73921687e+03f, 1.24750730e+04f, 4.87639975e+04f, 2.28145658e+05f, 1.30877796e+06f, 
+        9.46084663e+06f, 8.88883120e+07f, 1.12416883e+09f, 1.99127673e+10f, 5.16743469e+11f, 2.06721881e+13f, 
+        1.35061503e+15f, 1.53854066e+17f, };
+
+__constant__ float m_abscissas_float_6[65] =
+      { 4.91151004e-02f, 1.48013150e-01f, 2.48938814e-01f, 3.53325424e-01f, 4.62733557e-01f, 5.78912068e-01f,
+        7.03870253e-01f, 8.39965859e-01f, 9.90015066e-01f, 1.15743257e+00f, 1.34641276e+00f, 1.56216711e+00f, 
+        1.81123885e+00f, 2.10192442e+00f, 2.44484389e+00f, 2.85372075e+00f, 3.34645891e+00f, 3.94664582e+00f, 
+        4.68567310e+00f, 5.60576223e+00f, 6.76433234e+00f, 8.24038318e+00f, 1.01439436e+01f, 1.26302471e+01f,
+        1.59213040e+01f, 2.03392186e+01f, 2.63584645e+01f, 3.46892633e+01f, 4.64129147e+01f, 6.32055079e+01f,
+        8.77149726e+01f, 1.24209693e+02f, 1.79718635e+02f, 2.66081728e+02f, 4.03727303e+02f, 6.28811307e+02f,
+        1.00707984e+03f, 1.66156823e+03f, 2.82965144e+03f, 4.98438627e+03f, 9.10154693e+03f, 1.72689266e+04f,
+        3.41309958e+04f, 7.04566898e+04f, 1.52340422e+05f, 3.46047978e+05f, 8.28472421e+05f, 2.09759615e+06f,
+        5.63695080e+06f, 1.61407141e+07f, 4.94473068e+07f, 1.62781052e+08f, 5.78533297e+08f, 2.23083854e+09f,
+        9.38239131e+09f, 4.32814954e+10f, 2.20307274e+11f, 1.24524507e+12f, 7.86900053e+12f, 5.59953143e+13f,
+        4.52148695e+14f, 4.17688952e+15f, 4.45286776e+16f, 5.52914285e+17f, 8.07573252e+18f, };
+
+__constant__ float m_abscissas_float_7[129] =
+      { 2.45471558e-02f, 7.37246687e-02f, 1.23152531e-01f, 1.73000138e-01f, 2.23440665e-01f, 2.74652655e-01f, 
+        3.26821679e-01f, 3.80142101e-01f, 4.34818964e-01f, 4.91070037e-01f, 5.49128046e-01f, 6.09243132e-01f,
+        6.71685571e-01f, 7.36748805e-01f, 8.04752842e-01f, 8.76048080e-01f, 9.51019635e-01f, 1.03009224e+00f,
+        1.11373586e+00f, 1.20247203e+00f, 1.29688123e+00f, 1.39761124e+00f, 1.50538689e+00f, 1.62102121e+00f,
+        1.74542840e+00f, 1.87963895e+00f, 2.02481711e+00f, 2.18228138e+00f, 2.35352849e+00f, 2.54026147e+00f,
+        2.74442267e+00f, 2.96823279e+00f, 3.21423687e+00f, 3.48535896e+00f, 3.78496698e+00f, 4.11695014e+00f,
+        4.48581137e+00f, 4.89677825e+00f, 5.35593629e+00f, 5.87038976e+00f, 6.44845619e+00f, 7.09990245e+00f,
+        7.83623225e+00f, 8.67103729e+00f, 9.62042778e+00f, 1.07035620e+01f, 1.19433001e+01f, 1.33670142e+01f,
+        1.50075962e+01f, 1.69047155e+01f, 1.91063967e+01f, 2.16710044e+01f, 2.46697527e+01f, 2.81898903e+01f,
+        3.23387613e+01f, 3.72490076e+01f, 4.30852608e+01f, 5.00527965e+01f, 5.84087761e+01f, 6.84769282e+01f,
+        8.06668178e+01f, 9.54992727e+01f, 1.13640120e+02f, 1.35945194e+02f, 1.63520745e+02f, 1.97804969e+02f,
+        2.40678754e+02f, 2.94617029e+02f, 3.62896953e+02f, 4.49886178e+02f, 5.61444735e+02f, 7.05489247e+02f,
+        8.92790773e+02f, 1.13811142e+03f, 1.46183599e+03f, 1.89233262e+03f, 2.46939604e+03f, 3.24931157e+03f,
+        4.31236711e+03f, 5.77409475e+03f, 7.80224724e+03f, 1.06426753e+04f, 1.46591538e+04f, 2.03952854e+04f,
+        2.86717062e+04f, 4.07403376e+04f, 5.85318231e+04f, 8.50568927e+04f, 1.25064927e+05f, 1.86137394e+05f,
+        2.80525578e+05f, 4.28278249e+05f, 6.62634051e+05f, 1.03944324e+06f, 1.65385743e+06f, 2.67031565e+06f,
+        4.37721203e+06f, 7.28807171e+06f, 1.23317299e+07f, 2.12155729e+07f, 3.71308625e+07f, 6.61457938e+07f,
+        1.20005529e+08f, 2.21862941e+08f, 4.18228294e+08f, 8.04370413e+08f, 1.57939299e+09f, 3.16812242e+09f,
+        6.49660681e+09f, 1.36285199e+10f, 2.92686390e+10f, 6.43979867e+10f, 1.45275523e+11f, 3.36285446e+11f,
+        7.99420279e+11f, 1.95326423e+12f, 4.90958187e+12f, 1.27062273e+13f, 3.38907099e+13f, 9.32508403e+13f,
+        2.64948942e+14f, 7.78129518e+14f, 2.36471505e+15f, 7.44413803e+15f, 2.43021724e+16f, 8.23706864e+16f,
+        2.90211705e+17f, 1.06415768e+18f, 4.06627711e+18f, };
+
+__constant__ float m_abscissas_float_8[259] =
+      { 1.22722792e-02f, 3.68272289e-02f, 6.14133763e-02f, 8.60515971e-02f, 1.10762884e-01f, 1.35568393e-01f,
+        1.60489494e-01f, 1.85547813e-01f, 2.10765290e-01f, 2.36164222e-01f, 2.61767321e-01f, 2.87597761e-01f,
+        3.13679240e-01f, 3.40036029e-01f, 3.66693040e-01f, 3.93675878e-01f, 4.21010910e-01f, 4.48725333e-01f,
+        4.76847237e-01f, 5.05405685e-01f, 5.34430786e-01f, 5.63953775e-01f, 5.94007101e-01f, 6.24624511e-01f,
+        6.55841151e-01f, 6.87693662e-01f, 7.20220285e-01f, 7.53460977e-01f, 7.87457528e-01f, 8.22253686e-01f,
+        8.57895297e-01f, 8.94430441e-01f, 9.31909591e-01f, 9.70385775e-01f, 1.00991475e+00f, 1.05055518e+00f,
+        1.09236885e+00f, 1.13542087e+00f, 1.17977990e+00f, 1.22551840e+00f, 1.27271289e+00f, 1.32144424e+00f,
+        1.37179794e+00f, 1.42386447e+00f, 1.47773961e+00f, 1.53352485e+00f, 1.59132774e+00f, 1.65126241e+00f,
+        1.71344993e+00f, 1.77801893e+00f, 1.84510605e+00f, 1.91485658e+00f, 1.98742510e+00f, 2.06297613e+00f,
+        2.14168493e+00f, 2.22373826e+00f, 2.30933526e+00f, 2.39868843e+00f, 2.49202464e+00f, 2.58958621e+00f,
+        2.69163219e+00f, 2.79843963e+00f, 2.91030501e+00f, 3.02754584e+00f, 3.15050230e+00f, 3.27953915e+00f,
+        3.41504770e+00f, 3.55744805e+00f, 3.70719145e+00f, 3.86476298e+00f, 4.03068439e+00f, 4.20551725e+00f,
+        4.38986641e+00f, 4.58438376e+00f, 4.78977239e+00f, 5.00679110e+00f, 5.23625945e+00f, 5.47906320e+00f,
+        5.73616037e+00f, 6.00858792e+00f, 6.29746901e+00f, 6.60402117e+00f, 6.92956515e+00f, 7.27553483e+00f,
+        7.64348809e+00f, 8.03511888e+00f, 8.45227058e+00f, 8.89695079e+00f, 9.37134780e+00f, 9.87784877e+00f,
+        1.04190601e+01f, 1.09978298e+01f, 1.16172728e+01f, 1.22807990e+01f, 1.29921443e+01f, 1.37554055e+01f,
+        1.45750793e+01f, 1.54561061e+01f, 1.64039187e+01f, 1.74244972e+01f, 1.85244301e+01f, 1.97109839e+01f,
+        2.09921804e+01f, 2.23768845e+01f, 2.38749023e+01f, 2.54970927e+01f, 2.72554930e+01f, 2.91634608e+01f,
+        3.12358351e+01f, 3.34891185e+01f, 3.59416839e+01f, 3.86140099e+01f, 4.15289481e+01f, 4.47120276e+01f,
+        4.81918020e+01f, 5.20002465e+01f, 5.61732106e+01f, 6.07509371e+01f, 6.57786566e+01f, 7.13072704e+01f,
+        7.73941341e+01f, 8.41039609e+01f, 9.15098607e+01f, 9.96945411e+01f, 1.08751694e+02f, 1.18787600e+02f,
+        1.29922990e+02f, 1.42295202e+02f, 1.56060691e+02f, 1.71397955e+02f, 1.88510933e+02f, 2.07632988e+02f,
+        2.29031559e+02f, 2.53013612e+02f, 2.79932028e+02f, 3.10193130e+02f, 3.44265522e+02f, 3.82690530e+02f,
+        4.26094527e+02f, 4.75203518e+02f, 5.30860437e+02f, 5.94045681e+02f, 6.65901543e+02f, 7.47761337e+02f,
+        8.41184173e+02f, 9.47996570e+02f, 1.07034233e+03f, 1.21074246e+03f, 1.37216724e+03f, 1.55812321e+03f,
+        1.77275819e+03f, 2.02098849e+03f, 2.30865326e+03f, 2.64270219e+03f, 3.03142418e+03f, 3.48472668e+03f,
+        4.01447750e+03f, 4.63492426e+03f, 5.36320995e+03f, 6.22000841e+03f, 7.23030933e+03f, 8.42439022e+03f,
+        9.83902287e+03f, 1.15189746e+04f, 1.35188810e+04f, 1.59055875e+04f, 1.87610857e+04f, 2.21862046e+04f,
+        2.63052621e+04f, 3.12719440e+04f, 3.72767546e+04f, 4.45564828e+04f, 5.34062659e+04f, 6.41950058e+04f,
+        7.73851264e+04f, 9.35579699e+04f, 1.13446538e+05f, 1.37977827e+05f, 1.68327749e+05f, 2.05992575e+05f,
+        2.52882202e+05f, 3.11442272e+05f, 3.84814591e+05f, 4.77048586e+05f, 5.93380932e+05f, 7.40606619e+05f,
+        9.27573047e+05f, 1.16584026e+06f, 1.47056632e+06f, 1.86169890e+06f, 2.36558487e+06f, 3.01715270e+06f,
+        3.86288257e+06f, 4.96486431e+06f, 6.40636283e+06f, 8.29948185e+06f, 1.07957589e+07f, 1.41008733e+07f,
+        1.84951472e+07f, 2.43622442e+07f, 3.22295113e+07f, 4.28249388e+07f, 5.71579339e+07f, 7.66343793e+07f,
+        1.03221273e+08f, 1.39683399e+08f, 1.89925150e+08f, 2.59486540e+08f, 3.56266474e+08f, 4.91582541e+08f,
+        6.81731647e+08f, 9.50299811e+08f, 1.33159830e+09f, 1.87580198e+09f, 2.65667391e+09f, 3.78324022e+09f,
+        5.41753185e+09f, 7.80169537e+09f, 1.12996537e+10f, 1.64614916e+10f, 2.41235400e+10f, 3.55648690e+10f,
+        5.27534501e+10f, 7.87357211e+10f, 1.18256902e+11f, 1.78754944e+11f, 2.71963306e+11f, 4.16512215e+11f,
+        6.42178186e+11f, 9.96872550e+11f, 1.55821233e+12f, 2.45280998e+12f, 3.88865623e+12f, 6.20986899e+12f,
+        9.98992422e+12f, 1.61915800e+13f, 2.64432452e+13f, 4.35201885e+13f, 7.21888469e+13f, 1.20699764e+14f,
+        2.03448372e+14f, 3.45755310e+14f, 5.92524851e+14f, 1.02405779e+15f, 1.78517405e+15f, 3.13930699e+15f,
+        5.56985627e+15f, 9.97176335e+15f, 1.80168749e+16f, 3.28570986e+16f, 6.04901854e+16f, 1.12437528e+17f,
+        2.11044513e+17f, 4.00073701e+17f, 7.66084936e+17f, 1.48201877e+18f, 2.89694543e+18f, 5.72279017e+18f,
+        1.14268996e+19f, };
+
+__constant__ float* m_abscissas_float[8] = {
+    m_abscissas_float_1,
+    m_abscissas_float_2,
+    m_abscissas_float_3,
+    m_abscissas_float_4,
+    m_abscissas_float_5,
+    m_abscissas_float_6,
+    m_abscissas_float_7,
+    m_abscissas_float_8,
+};
+
+__constant__ float m_weights_float_1[4] = 
+      { 7.86824160e+00f, 8.80516388e+02f, 5.39627832e+07f, 8.87651190e+19f, };
+
+__constant__ float m_weights_float_2[4] = 
+      { 2.39852428e+00f, 5.24459642e+01f, 6.45788782e+04f, 2.50998524e+12f, };
+
+__constant__ float m_weights_float_3[8] = 
+      { 1.74936958e+00f, 3.97965898e+00f, 1.84851460e+01f, 1.86488072e+02f, 5.97420570e+03f, 1.27041264e+06f, 
+        6.16419301e+09f, 5.23085003e+15f, };
+
+__constant__ float m_weights_float_4[16] = 
+      { 1.61385906e+00f, 1.99776729e+00f, 3.02023198e+00f, 5.47764184e+00f, 1.17966092e+01f, 3.03550485e+01f, 
+        9.58442179e+01f, 3.89387024e+02f, 2.17919325e+03f, 1.83920812e+04f, 2.63212061e+05f, 7.42729651e+06f,
+        5.01587565e+08f, 1.03961087e+11f, 9.10032891e+13f, 5.06865116e+17f, };
+
+__constant__ float m_weights_float_5[32] = 
+      { 1.58146596e+00f, 1.66914991e+00f, 1.85752319e+00f, 2.17566262e+00f, 2.67590138e+00f, 3.44773868e+00f,
+        4.64394654e+00f, 6.53020450e+00f, 9.58228502e+00f, 1.46836141e+01f, 2.35444955e+01f, 3.96352727e+01f,
+        7.03763521e+01f, 1.32588012e+02f, 2.66962565e+02f, 5.79374920e+02f, 1.36869193e+03f, 3.55943572e+03f,
+        1.03218668e+04f, 3.38662130e+04f, 1.27816626e+05f, 5.65408251e+05f, 2.99446204e+06f, 1.94497502e+07f,
+        1.59219301e+08f, 1.69428882e+09f, 2.42715618e+10f, 4.87031785e+11f, 1.43181966e+13f, 6.48947152e+14f,
+        4.80375775e+16f, 6.20009636e+18f, };
+
+__constant__ float m_weights_float_6[65] = 
+      { 1.57345777e+00f, 1.59489276e+00f, 1.63853652e+00f, 1.70598041e+00f, 1.79972439e+00f, 1.92332285e+00f,
+        2.08159737e+00f, 2.28093488e+00f, 2.52969785e+00f, 2.83878478e+00f, 3.22239575e+00f, 3.69908136e+00f,
+        4.29318827e+00f, 5.03686536e+00f, 5.97287114e+00f, 7.15853842e+00f, 8.67142780e+00f, 1.06174736e+01f,
+        1.31428500e+01f, 1.64514563e+01f, 2.08309945e+01f, 2.66923599e+01f, 3.46299351e+01f, 4.55151836e+01f,
+        6.06440809e+01f, 8.19729692e+01f, 1.12502047e+02f, 1.56909655e+02f, 2.22620435e+02f, 3.21638549e+02f,
+        4.73757451e+02f, 7.12299455e+02f, 1.09460965e+03f, 1.72169779e+03f, 2.77592491e+03f, 4.59523007e+03f,
+        7.82342759e+03f, 1.37235744e+04f, 2.48518896e+04f, 4.65553875e+04f, 9.04176678e+04f, 1.82484396e+05f,
+        3.83680026e+05f, 8.42627197e+05f, 1.93843257e+06f, 4.68511285e+06f, 1.19352867e+07f, 3.21564375e+07f,
+        9.19600893e+07f, 2.80222318e+08f, 9.13611083e+08f, 3.20091090e+09f, 1.21076526e+10f, 4.96902475e+10f,
+        2.22431575e+11f, 1.09212534e+12f, 5.91688298e+12f, 3.55974344e+13f, 2.39435365e+14f, 1.81355107e+15f,
+        1.55873671e+16f, 1.53271488e+17f, 1.73927478e+18f, 2.29884122e+19f, 3.57403070e+20f, };
+
+__constant__ float m_weights_float_7[129] = 
+      { 1.57146132e+00f, 1.57679017e+00f, 1.58749564e+00f, 1.60367396e+00f, 1.62547113e+00f, 1.65308501e+00f,
+        1.68676814e+00f, 1.72683132e+00f, 1.77364814e+00f, 1.82766042e+00f, 1.88938482e+00f, 1.95942057e+00f,
+        2.03845873e+00f, 2.12729290e+00f, 2.22683194e+00f, 2.33811466e+00f, 2.46232715e+00f, 2.60082286e+00f,
+        2.75514621e+00f, 2.92706011e+00f, 3.11857817e+00f, 3.33200254e+00f, 3.56996830e+00f, 3.83549565e+00f,
+        4.13205150e+00f, 4.46362211e+00f, 4.83479919e+00f, 5.25088196e+00f, 5.71799849e+00f, 6.24325042e+00f,
+        6.83488580e+00f, 7.50250620e+00f, 8.25731548e+00f, 9.11241941e+00f, 1.00831875e+01f, 1.11876913e+01f,
+        1.24472371e+01f, 1.38870139e+01f, 1.55368872e+01f, 1.74323700e+01f, 1.96158189e+01f, 2.21379089e+01f,
+        2.50594593e+01f, 2.84537038e+01f, 3.24091185e+01f, 3.70329629e+01f, 4.24557264e+01f, 4.88367348e+01f,
+        5.63712464e+01f, 6.52994709e+01f, 7.59180776e+01f, 8.85949425e+01f, 1.03788130e+02f, 1.22070426e+02f,
+        1.44161210e+02f, 1.70968019e+02f, 2.03641059e+02f, 2.43645006e+02f, 2.92854081e+02f, 3.53678602e+02f,
+        4.29234308e+02f, 5.23570184e+02f, 6.41976690e+02f, 7.91405208e+02f, 9.81042209e+02f, 1.22309999e+03f,
+        1.53391256e+03f, 1.93546401e+03f, 2.45753455e+03f, 3.14073373e+03f, 4.04081819e+03f, 5.23488160e+03f,
+        6.83029446e+03f, 8.97771323e+03f, 1.18901592e+04f, 1.58712239e+04f, 2.13571111e+04f, 2.89798371e+04f,
+        3.96630673e+04f, 5.47687519e+04f, 7.63235654e+04f, 1.07371915e+05f, 1.52531667e+05f, 2.18877843e+05f,
+        3.17362450e+05f, 4.65120153e+05f, 6.89253766e+05f, 1.03311989e+06f, 1.56688798e+06f, 2.40549203e+06f,
+        3.73952896e+06f, 5.88912115e+06f, 9.39904635e+06f, 1.52090328e+07f, 2.49628719e+07f, 4.15775926e+07f,
+        7.03070537e+07f, 1.20759856e+08f, 2.10788251e+08f, 3.74104720e+08f, 6.75449459e+08f, 1.24131674e+09f,
+        2.32331003e+09f, 4.43117602e+09f, 8.61744649e+09f, 1.70983691e+10f, 3.46357452e+10f, 7.16760712e+10f,
+        1.51634762e+11f, 3.28172932e+11f, 7.27110260e+11f, 1.65049955e+12f, 3.84133815e+12f, 9.17374427e+12f,
+        2.24990195e+13f, 5.67153509e+13f, 1.47074225e+14f, 3.92701252e+14f, 1.08063998e+15f, 3.06767147e+15f,
+        8.99238679e+15f, 2.72472254e+16f, 8.54294612e+16f, 2.77461372e+17f, 9.34529948e+17f, 3.26799612e+18f,
+        1.18791443e+19f, 4.49405341e+19f, 1.77170665e+20f, };
+
+__constant__ float m_weights_float_8[259] = 
+      { 1.57096255e+00f, 1.57229290e+00f, 1.57495658e+00f, 1.57895955e+00f, 1.58431079e+00f, 1.59102230e+00f,
+        1.59910918e+00f, 1.60858966e+00f, 1.61948515e+00f, 1.63182037e+00f, 1.64562338e+00f, 1.66092569e+00f,
+        1.67776241e+00f, 1.69617233e+00f, 1.71619809e+00f, 1.73788633e+00f, 1.76128784e+00f, 1.78645779e+00f,
+        1.81345587e+00f, 1.84234658e+00f, 1.87319943e+00f, 1.90608922e+00f, 1.94109632e+00f, 1.97830698e+00f,
+        2.01781368e+00f, 2.05971547e+00f, 2.10411838e+00f, 2.15113585e+00f, 2.20088916e+00f, 2.25350798e+00f,
+        2.30913084e+00f, 2.36790578e+00f, 2.42999091e+00f, 2.49555516e+00f, 2.56477893e+00f, 2.63785496e+00f,
+        2.71498915e+00f, 2.79640147e+00f, 2.88232702e+00f, 2.97301705e+00f, 3.06874019e+00f, 3.16978367e+00f,
+        3.27645477e+00f, 3.38908227e+00f, 3.50801806e+00f, 3.63363896e+00f, 3.76634859e+00f, 3.90657947e+00f,
+        4.05479525e+00f, 4.21149322e+00f, 4.37720695e+00f, 4.55250922e+00f, 4.73801517e+00f, 4.93438579e+00f,
+        5.14233166e+00f, 5.36261713e+00f, 5.59606472e+00f, 5.84356014e+00f, 6.10605759e+00f, 6.38458564e+00f,
+        6.68025373e+00f, 6.99425915e+00f, 7.32789480e+00f, 7.68255767e+00f, 8.05975815e+00f, 8.46113023e+00f,
+        8.88844279e+00f, 9.34361190e+00f, 9.82871448e+00f, 1.03460033e+01f, 1.08979234e+01f, 1.14871305e+01f,
+        1.21165112e+01f, 1.27892047e+01f, 1.35086281e+01f, 1.42785033e+01f, 1.51028871e+01f, 1.59862046e+01f,
+        1.69332867e+01f, 1.79494108e+01f, 1.90403465e+01f, 2.02124072e+01f, 2.14725057e+01f, 2.28282181e+01f,
+        2.42878539e+01f, 2.58605342e+01f, 2.75562800e+01f, 2.93861096e+01f, 3.13621485e+01f, 3.34977526e+01f,
+        3.58076454e+01f, 3.83080730e+01f, 4.10169773e+01f, 4.39541917e+01f, 4.71416602e+01f, 5.06036855e+01f,
+        5.43672075e+01f, 5.84621188e+01f, 6.29216205e+01f, 6.77826252e+01f, 7.30862125e+01f, 7.88781469e+01f,
+        8.52094636e+01f, 9.21371360e+01f, 9.97248336e+01f, 1.08043785e+02f, 1.17173764e+02f, 1.27204209e+02f,
+        1.38235512e+02f, 1.50380485e+02f, 1.63766039e+02f, 1.78535118e+02f, 1.94848913e+02f, 2.12889407e+02f,
+        2.32862309e+02f, 2.55000432e+02f, 2.79567594e+02f, 3.06863126e+02f, 3.37227087e+02f, 3.71046310e+02f,
+        4.08761417e+02f, 4.50874968e+02f, 4.97960949e+02f, 5.50675821e+02f, 6.09771424e+02f, 6.76110054e+02f,
+        7.50682104e+02f, 8.34626760e+02f, 9.29256285e+02f, 1.03608458e+03f, 1.15686082e+03f, 1.29360914e+03f,
+        1.44867552e+03f, 1.62478326e+03f, 1.82509876e+03f, 2.05330964e+03f, 2.31371761e+03f, 2.61134924e+03f,
+        2.95208799e+03f, 3.34283233e+03f, 3.79168493e+03f, 4.30817984e+03f, 4.90355562e+03f, 5.59108434e+03f,
+        6.38646863e+03f, 7.30832183e+03f, 8.37874981e+03f, 9.62405722e+03f, 1.10756067e+04f, 1.27708661e+04f,
+        1.47546879e+04f, 1.70808754e+04f, 1.98141031e+04f, 2.30322789e+04f, 2.68294532e+04f, 3.13194118e+04f,
+        3.66401221e+04f, 4.29592484e+04f, 5.04810088e+04f, 5.94547213e+04f, 7.01854788e+04f, 8.30475173e+04f,
+        9.85009981e+04f, 1.17113127e+05f, 1.39584798e+05f, 1.66784302e+05f, 1.99790063e+05f, 2.39944995e+05f,
+        2.88925794e+05f, 3.48831531e+05f, 4.22297220e+05f, 5.12639825e+05f, 6.24046488e+05f, 7.61817907e+05f,
+        9.32683930e+05f, 1.14521401e+06f, 1.41035265e+06f, 1.74212004e+06f, 2.15853172e+06f, 2.68280941e+06f,
+        3.34498056e+06f, 4.18399797e+06f, 5.25055801e+06f, 6.61086017e+06f, 8.35163942e+06f, 1.05869253e+07f,
+        1.34671524e+07f, 1.71914827e+07f, 2.20245345e+07f, 2.83191730e+07f, 3.65476782e+07f, 4.73445266e+07f,
+        6.15653406e+07f, 8.03684303e+07f, 1.05328028e+08f, 1.38592169e+08f, 1.83103699e+08f, 2.42910946e+08f,
+        3.23606239e+08f, 4.32947522e+08f, 5.81743297e+08f, 7.85117979e+08f, 1.06432920e+09f, 1.44938958e+09f,
+        1.98286647e+09f, 2.72541431e+09f, 3.76386796e+09f, 5.22313881e+09f, 7.28378581e+09f, 1.02080964e+10f,
+        1.43789932e+10f, 2.03583681e+10f, 2.89749983e+10f, 4.14577375e+10f, 5.96383768e+10f, 8.62622848e+10f,
+        1.25466705e+11f, 1.83521298e+11f, 2.69981221e+11f, 3.99492845e+11f, 5.94638056e+11f, 8.90440997e+11f,
+        1.34155194e+12f, 2.03376855e+12f, 3.10262796e+12f, 4.76359832e+12f, 7.36142036e+12f, 1.14512696e+13f,
+        1.79331419e+13f, 2.82758550e+13f, 4.48929705e+13f, 7.17780287e+13f, 1.15585510e+14f, 1.87483389e+14f,
+        3.06351036e+14f, 5.04340065e+14f, 8.36616340e+14f, 1.39855635e+15f, 2.35633575e+15f, 4.00176517e+15f,
+        6.85137513e+15f, 1.18269011e+16f, 2.05867353e+16f, 3.61396878e+16f, 6.39911218e+16f, 1.14301619e+17f,
+        2.05988138e+17f, 3.74584679e+17f, 6.87444303e+17f, 1.27340764e+18f, 2.38124192e+18f, 4.49583562e+18f,
+        8.57144202e+18f, 1.65044358e+19f, 3.21010035e+19f, 6.30778012e+19f, 1.25240403e+20f, 2.51300530e+20f,
+        5.09677626e+20f, };
+
+__constant__ float* m_weights_float[8] = {
+    m_weights_float_1,
+    m_weights_float_2,
+    m_weights_float_3,
+    m_weights_float_4,
+    m_weights_float_5,
+    m_weights_float_6,
+    m_weights_float_7,
+    m_weights_float_8
+};
+
+__constant__ double m_abscissas_double_1[6] = 
+      { 3.088287417976322866e+00, 1.489931846492091580e+02, 3.412289247883437102e+06, 2.069325766042617791e+18, 
+        2.087002407609475560e+50, 2.019766160717908151e+137, };
+
+__constant__ double m_abscissas_double_2[6] =
+      { 9.130487626376696748e-01, 1.415789294662811592e+01, 6.704215516223276482e+03, 9.641725327150499415e+10, 
+        2.508950760085778485e+30, 1.447263535710337145e+83, };
+
+__constant__ double m_abscissas_double_3[12] =
+      { 4.072976900657586902e-01, 1.682066707021148743e+00, 6.150897986386729515e+00, 4.003962351929400222e+01,
+        7.929200247931026321e+02, 1.029849713330979583e+05, 3.038623109252438574e+08, 1.565445474362494869e+14, 
+        4.042465098430219104e+23, 1.321706827429658179e+39, 4.991231782099557998e+64, 7.352943850359875966e+106, };
+
+__constant__ double m_abscissas_double_4[24] =
+      { 1.981352722514781726e-01, 6.401556735005260177e-01, 1.248928698253977663e+00, 2.266080840944321232e+00, 
+        4.296462696702327381e+00, 9.130290387099955696e+00, 2.311107653864279933e+01, 7.427706034324012430e+01,
+        3.267209207115258917e+02, 2.159485694311818716e+03, 2.415015262896413060e+04, 5.318194002756929158e+05,
+        2.800586857217043323e+07, 4.524065079794338780e+09, 3.085612573980677122e+12, 1.338826733015807478e+16,
+        6.254617176562341381e+20, 6.182098535814164754e+26, 3.077293649788458067e+34, 2.348957289370104303e+44, 
+        1.148543197899469758e+57, 2.255300070010069868e+73, 1.877919500569195394e+94, 1.367473887938624280e+121, };
+
+__constant__ double m_abscissas_double_5[49] = 
+      { 9.839678940067320339e-02, 3.006056176599550351e-01, 5.198579789949384900e-01, 7.703620832988877009e-01, 
+        1.071311369641311830e+00, 1.450569758088998445e+00, 1.950778549520360334e+00, 2.640031773695551468e+00,
+        3.631372373667412273e+00, 5.119915330903350570e+00, 7.456660981404883289e+00, 1.130226126889972624e+01,
+        1.796410692472772550e+01, 3.017810704601898222e+01, 5.403875800312370567e+01, 1.041077314477469548e+02,
+        2.180295201202628077e+02, 5.021556986259101646e+02, 1.288621310998222420e+03, 3.739216870800548324e+03,
+        1.247507297020191232e+04, 4.876399753226692124e+04, 2.281456582219130122e+05, 1.308777960064843017e+06,
+        9.460846634209664077e+06, 8.888831203637279622e+07, 1.124168828974344134e+09, 1.991276729532144470e+10,
+        5.167434691060984650e+11, 2.067218814203990888e+13, 1.350615033184100406e+15, 1.538540662836508188e+17,
+        3.290747290540350661e+19, 1.437291381884498816e+22, 1.409832445530347286e+25, 3.459135480277971441e+28,
+        2.398720582340954092e+32, 5.398806604617292960e+36, 4.613340002580628610e+41, 1.787685909667902457e+47,
+        3.841984370124338536e+53, 5.752797955708583700e+60, 7.771812038427286551e+68, 1.269673044204081626e+78,
+        3.495676773765731568e+88, 2.362519474971692445e+100, 6.002143893273651123e+113, 9.290716303464155539e+128,
+        1.514442238033847090e+146, };
+
+__constant__ double m_abscissas_double_6[98] = 
+      { 4.911510035029024930e-02, 1.480131496743607333e-01, 2.489388137406836857e-01, 3.533254236926684378e-01,
+        4.627335566122353259e-01, 5.789120681640963067e-01, 7.038702533860627799e-01, 8.399658591446505688e-01,
+        9.900150664244376147e-01, 1.157432570143699131e+00, 1.346412759185361763e+00, 1.562167113901335551e+00,
+        1.811238852782323380e+00, 2.101924419006550301e+00, 2.444843885584197934e+00, 2.853720746632915024e+00,
+        3.346458910955350787e+00, 3.946645821057838387e+00, 4.685673101596678529e+00, 5.605762230908151175e+00,
+        6.764332336830574204e+00, 8.240383175379985221e+00, 1.014394356129857730e+01, 1.263024714338892472e+01,
+        1.592130395780345258e+01, 2.033921861921857185e+01, 2.635846445760633752e+01, 3.468926333224152409e+01,
+        4.641291467019728963e+01, 6.320550793890424203e+01, 8.771497261808906374e+01, 1.242096926240411498e+02,
+        1.797186347845127557e+02, 2.660817283327900190e+02, 4.037273029575712841e+02, 6.288113066545908703e+02,
+        1.007079837507490594e+03, 1.661568229185114288e+03, 2.829651440786582598e+03, 4.984386266585669139e+03,
+        9.101546927647810893e+03, 1.726892655475049727e+04, 3.413099578778601190e+04, 7.045668977053092802e+04,
+        1.523404217761279128e+05, 3.460479782897947414e+05, 8.284724209233183002e+05, 2.097596146601193946e+06,
+        5.636950798861273236e+06, 1.614071410855607245e+07, 4.944730678915060360e+07, 1.627810516820991356e+08,
+        5.785332971632280838e+08, 2.230838540681955690e+09, 9.382391306064739643e+09, 4.328149544776551692e+10,
+        2.203072744049242904e+11, 1.245245067109136413e+12, 7.869000534957822375e+12, 5.599531432979422461e+13,
+        4.521486949902090877e+14, 4.176889516548293265e+15, 4.452867759650496656e+16, 5.529142853140498068e+17,
+        8.075732516562854275e+18, 1.402046916260468698e+20, 2.925791412832239850e+21, 7.426433029335410886e+22,
+        2.321996331245735364e+24, 9.064194250638442432e+25, 4.481279048819445609e+27, 2.849046304726990645e+29,
+        2.367381159183355975e+31, 2.615825578455121227e+33, 3.914764948263290808e+35, 8.092042448555929219e+37,
+        2.358921320940630332e+40, 9.915218648535332591e+42, 6.152851059342658764e+45, 5.780276340144515388e+48,
+        8.443751734186488626e+51, 1.973343350899766708e+55, 7.605247378556219980e+58, 4.992057104939510418e+62,
+        5.775863423903912316e+66, 1.221808201945355603e+71, 4.912917230387133816e+75, 3.913971813732202372e+80,
+        6.456388069905286787e+85, 2.311225068528010358e+91, 1.887458157719431339e+97, 3.708483165438453094e+103,
+        1.855198812283538635e+110, 2.509787873171705318e+117, 9.790423755591216617e+124, 1.179088807944050747e+133,
+        4.714631846722476620e+141, 6.762657785959713240e+150, };
+
+__constant__ double m_abscissas_double_7[196] = 
+      { 2.454715583629863651e-02, 7.372466873903346224e-02, 1.231525309416766543e-01, 1.730001377719248556e-01,
+        2.234406649596860001e-01, 2.746526549718518258e-01, 3.268216792980646669e-01, 3.801421009804789245e-01,
+        4.348189637215614948e-01, 4.910700365099428407e-01, 5.491280459480215441e-01, 6.092431324382654397e-01,
+        6.716855712021148069e-01, 7.367488049067938643e-01, 8.047528416336950644e-01, 8.760480802482050705e-01,
+        9.510196351823332253e-01, 1.030092244532470067e+00, 1.113735859588680765e+00, 1.202472030918058876e+00,
+        1.296881226496863751e+00, 1.397611241828373026e+00, 1.505386891360545205e+00, 1.621021205894798030e+00,
+        1.745428403369044572e+00, 1.879638952031029331e+00, 2.024817107609328524e+00, 2.182281382147884181e+00,
+        2.353528494823881355e+00, 2.540261468229626457e+00, 2.744422672171478111e+00, 2.968232787190606619e+00,
+        3.214236869520657666e+00, 3.485358957907730467e+00, 3.784966983117372821e+00, 4.116950138940295100e+00,
+        4.485811369388231710e+00, 4.896778246562001812e+00, 5.355936290826725948e+00, 5.870389762600956907e+00,
+        6.448456189131117605e+00, 7.099902452679558236e+00, 7.836232253282841261e+00, 8.671037293575230635e+00,
+        9.620427777985990363e+00, 1.070356198876799531e+01, 1.194330008139441022e+01, 1.336701421038499647e+01,
+        1.500759615914396343e+01, 1.690471548203528376e+01, 1.910639668731689597e+01, 2.167100443216577994e+01,
+        2.466975274695099197e+01, 2.818989025157845355e+01, 3.233876132429401745e+01, 3.724900758097245740e+01,
+        4.308526084907741997e+01, 5.005279647654703975e+01, 5.840877607253876528e+01, 6.847692821534239862e+01,
+        8.066681777060714848e+01, 9.549927270200249260e+01, 1.136401195769487885e+02, 1.359451944976603209e+02,
+        1.635207451879744447e+02, 1.978049687912586950e+02, 2.406787535889776661e+02, 2.946170292930555023e+02,
+        3.628969532147125333e+02, 4.498861782715596902e+02, 5.614447353133496106e+02, 7.054892470899271429e+02,
+        8.927907732799964116e+02, 1.138111424979478376e+03, 1.461835991563605367e+03, 1.892332623444716186e+03,
+        2.469396036186133479e+03, 3.249311569298824731e+03, 4.312367113170283012e+03, 5.774094754500139661e+03,
+        7.802247237500851845e+03, 1.064267530975806972e+04, 1.465915383535674990e+04, 2.039528541239754835e+04,
+        2.867170622421556265e+04, 4.074033762183453297e+04, 5.853182310596923393e+04, 8.505689265265206640e+04,
+        1.250649269847856615e+05, 1.861373943166749766e+05, 2.805255777452010927e+05, 4.282782486084761748e+05,
+        6.626340506127657304e+05, 1.039443239650339565e+06, 1.653857426112961316e+06, 2.670315650125279161e+06,
+        4.377212026624358795e+06, 7.288071713698413821e+06, 1.233172993400331694e+07, 2.121557285769933699e+07,
+        3.713086254861535383e+07, 6.614579377352135534e+07, 1.200055291694917110e+08, 2.218629410296880690e+08,
+        4.182282939928687703e+08, 8.043704132493714804e+08, 1.579392989425668114e+09, 3.168122415524104635e+09,
+        6.496606811549861323e+09, 1.362851988356444486e+10, 2.926863897008707708e+10, 6.439798665209493735e+10,
+        1.452755233772903022e+11, 3.362854459389246576e+11, 7.994202785433479271e+11, 1.953264233362291960e+12,
+        4.909581868242554569e+12, 1.270622730765015610e+13, 3.389070986742985764e+13, 9.325084030208844833e+13,
+        2.649489423834534140e+14, 7.781295184094957195e+14, 2.364715052527355639e+15, 7.444138031465958255e+15,
+        2.430217240684749635e+16, 8.237068641534357762e+16, 2.902117050664548840e+17, 1.064157679404037013e+18,
+        4.066277106061960017e+18, 1.621274233630359097e+19, 6.754156830915450013e+19, 2.944056841733781919e+20,
+        1.344640139549107817e+21, 6.444586158944723300e+21, 3.246218667554608934e+22, 1.721234579556653533e+23,
+        9.622533890240474391e+23, 5.681407260417956671e+24, 3.548890779995928184e+25, 2.349506425672269562e+26,
+        1.651618130605205643e+27, 1.235147426493113059e+28, 9.845947239792057550e+28, 8.383130781984610418e+29,
+        7.639649461399172445e+30, 7.467862732233885201e+31, 7.847691482004993660e+32, 8.886032557626454704e+33,
+        1.086734890678302436e+35, 1.438967777036538458e+36, 2.068168865475603521e+37, 3.234885320223912385e+38,
+        5.521233641542628514e+39, 1.031148231194663855e+41, 2.113272035816365982e+42, 4.766724345485077520e+43,
+        1.186961550990218287e+45, 3.273172169205847573e+46, 1.002821226769167753e+48, 3.424933903935156479e+49,
+        1.308436017026428736e+51, 5.611378330048420503e+52, 2.711424806327139291e+54, 1.481771793644066442e+56,
+        9.194282071042778804e+57, 6.503661455875355562e+59, 5.266329986868627303e+61, 4.902662807969347359e+63,
+        5.270511057289557050e+65, 6.572856511670583316e+67, 9.553956030013225387e+69, 1.626491911159411616e+72,
+        3.259410915500951223e+74, 7.728460318113614280e+76, 2.179881996905918059e+79, 7.354484388371505915e+81,
+        2.984831270803957746e+84, 1.465828267813438962e+87, 8.763355972629864261e+89, 6.417909665847831130e+92,
+        5.794958649229893510e+95, 6.494224472311908365e+98, 9.095000156016433698e+101, 1.603058498455299102e+105,
+        3.582099119119320529e+108, 1.022441227139854687e+112, 3.756872185015086057e+115, 1.791363463832849159e+119,
+        1.117641882039472124e+123, 9.202159565546528285e+126, 1.008716474827888568e+131, 1.485546487089301805e+135,
+        2.966961534830566097e+139, 8.114207284664369360e+143, 3.069178087507669739e+148, 1.622223681147791473e+153, };
+
+__constant__ double m_abscissas_double_8[391] = 
+      { 1.227227917054637830e-02, 3.682722894492590471e-02, 6.141337626871079991e-02, 8.605159708778207907e-02,
+        1.107628840017845446e-01, 1.355683934957785482e-01, 1.604894937454335489e-01, 1.855478131645089496e-01,
+        2.107652898670700524e-01, 2.361642222214626268e-01, 2.617673206785495261e-01, 2.875977610631342900e-01,
+        3.136792395249035647e-01, 3.400360293536632770e-01, 3.666930398731810193e-01, 3.936758776386451797e-01,
+        4.210109101746846268e-01, 4.487253325041450341e-01, 4.768472367324829462e-01, 5.054056849688209375e-01,
+        5.344307858825229079e-01, 5.639537752137267134e-01, 5.940071005777549000e-01, 6.246245109268716053e-01,
+        6.558411510586397969e-01, 6.876936615883514922e-01, 7.202202848338683401e-01, 7.534609770949572224e-01,
+        7.874575278460963461e-01, 8.222536864020499377e-01, 8.578952966595825808e-01, 8.944304405668593009e-01,
+        9.319095910247435485e-01, 9.703857749817920659e-01, 1.009914747547728584e+00, 1.050555178019083150e+00,
+        1.092368848786092579e+00, 1.135420868172514300e+00, 1.179779898350424466e+00, 1.225518399571142610e+00,
+        1.272712892062026473e+00, 1.321444237057985065e+00, 1.371797938567245953e+00, 1.423864467614384096e+00,
+        1.477739610861208115e+00, 1.533524845679288858e+00, 1.591327743938355098e+00, 1.651262406984310076e+00,
+        1.713449934511288211e+00, 1.778018930286256858e+00, 1.845106047964720870e+00, 1.914856580544951899e+00,
+        1.987425097349017093e+00, 2.062976132795275283e+00, 2.141684931642916785e+00, 2.223738255848994521e+00,
+        2.309335258687213796e+00, 2.398688432341103821e+00, 2.492024635808356095e+00, 2.589586210645122756e+00,
+        2.691632192846832444e+00, 2.798439630014497291e+00, 2.910305013902562652e+00, 3.027545839497364963e+00,
+        3.150502302946919722e+00, 3.279539151967394330e+00, 3.415047703805410611e+00, 3.557448047456550733e+00,
+        3.707191448649779817e+00, 3.864762978128342125e+00, 4.030684386016531344e+00, 4.205517247588613835e+00,
+        4.389866408585172458e+00, 4.584383761391930748e+00, 4.789772386950687695e+00, 5.006791101261363264e+00,
+        5.236259449815274050e+00, 5.479063198337523150e+00, 5.736160373884817415e+00, 6.008587916728619858e+00,
+        6.297469010648863048e+00, 6.604021167380929133e+00, 6.929565150124677837e+00, 7.275534831383860972e+00,
+        7.643488092123492064e+00, 8.035118882502459288e+00, 8.452270579478188130e+00, 8.896950793641785313e+00,
+        9.371347797016395173e+00, 9.877848765573446033e+00, 1.041906005527762037e+01, 1.099782975900831706e+01,
+        1.161727282423952258e+01, 1.228079904848924611e+01, 1.299214431196691048e+01, 1.375540545535625881e+01,
+        1.457507926620621316e+01, 1.545610610104852468e+01, 1.640391874338302925e+01, 1.742449718154208970e+01,
+        1.852443008688437526e+01, 1.971098388378266494e+01, 2.099218043080961648e+01, 2.237688448013982946e+01,
+        2.387490225270073820e+01, 2.549709266380430464e+01, 2.725549296232531555e+01, 2.916346081119624987e+01,
+        3.123583514423284962e+01, 3.348911849136805118e+01, 3.594168387985465099e+01, 3.861400990307230737e+01,
+        4.152894811329303023e+01, 4.471202755441533396e+01, 4.819180202224910174e+01, 5.200024654361558757e+01,
+        5.617321062537384494e+01, 6.075093706918782079e+01, 6.577865661168003966e+01, 7.130727037357721343e+01,
+        7.739413413465805794e+01, 8.410396085269633392e+01, 9.150986068496734448e+01, 9.969454113547704016e+01,
+        1.087516939426018897e+02, 1.187876000643037532e+02, 1.299229897614516371e+02, 1.422952015056372537e+02,
+        1.560606914665002671e+02, 1.713979549326432406e+02, 1.885109325154830073e+02, 2.076329877740125935e+02,
+        2.290315594654587370e+02, 2.530136115655676467e+02, 2.799320282398896912e+02, 3.101931299766730890e+02,
+        3.442655222107529892e+02, 3.826905303289378387e+02, 4.260945266207607701e+02, 4.752035175892902045e+02,
+        5.308604366239058864e+02, 5.940456805372995009e+02, 6.659015428338778262e+02, 7.477613367309153870e+02,
+        8.411841730471343023e+02, 9.479965698013741524e+02, 1.070342331375881840e+03, 1.210742457518582660e+03,
+        1.372167241552205820e+03, 1.558123212187692722e+03, 1.772758188662716282e+03, 2.020988485411862984e+03,
+        2.308653259329163157e+03, 2.642702189813684273e+03, 3.031424182869210212e+03, 3.484726676985756018e+03,
+        4.014477504733973505e+03, 4.634924264049394751e+03, 5.363209949773439749e+03, 6.220008412114342803e+03,
+        7.230309332853029956e+03, 8.424390216735217783e+03, 9.839022871538541787e+03, 1.151897463083113988e+04,
+        1.351888098874374202e+04, 1.590558745460066947e+04, 1.876108572764816176e+04, 2.218620462393366275e+04,
+        2.630526205054915357e+04, 3.127194401941711057e+04, 3.727675461256652923e+04, 4.455648280312273249e+04,
+        5.340626592018903930e+04, 6.419500580388918123e+04, 7.738512642386820060e+04, 9.355796993981725963e+04,
+        1.134465375820669470e+05, 1.379778272209741713e+05, 1.683277485807887053e+05, 2.059925746120735305e+05,
+        2.528822024503158254e+05, 3.114422718347725915e+05, 3.848145913435570736e+05, 4.770485864966822643e+05,
+        5.933809324724740854e+05, 7.406066190351666115e+05, 9.275730471470643372e+05, 1.165840260940180415e+06,
+        1.470566322118246135e+06, 1.861698899014921971e+06, 2.365584870298354495e+06, 3.017152695505764877e+06,
+        3.862882573599929249e+06, 4.964864305589750358e+06, 6.406362829959736606e+06, 8.299481847261302115e+06,
+        1.079575892642401854e+07, 1.410087327474604091e+07, 1.849514724418250100e+07, 2.436224419670805500e+07,
+        3.222951131863941234e+07, 4.282493882385925337e+07, 5.715793394339267637e+07, 7.663437932745451635e+07,
+        1.032212725498489699e+08, 1.396833991976194842e+08, 1.899251497664892740e+08, 2.594865396467505851e+08,
+        3.562664742464501497e+08, 4.915825413172413471e+08, 6.817316470116958142e+08, 9.502998105202541438e+08,
+        1.331598295343277538e+09, 1.875801976010459831e+09, 2.656673907709731487e+09, 3.783240215616365909e+09,
+        5.417531848500136979e+09, 7.801695369892847510e+09, 1.129965368955098833e+10, 1.646149161390821924e+10,
+        2.412353995736687694e+10, 3.556486895431927094e+10, 5.275345014093760519e+10, 7.873572108325378177e+10,
+        1.182569020317863604e+11, 1.787549442508363461e+11, 2.719633064979986142e+11, 4.165122153119897946e+11,
+        6.421781858205134197e+11, 9.968725497576275918e+11, 1.558212327122960399e+12, 2.452809984907093786e+12,
+        3.888656232828140210e+12, 6.209868990509424909e+12, 9.989924216297983665e+12, 1.619158001378611351e+13,
+        2.644324518669926559e+13, 4.352018847904374786e+13, 7.218884688202741709e+13, 1.206997640727349538e+14,
+        2.034483722445207402e+14, 3.457553102874402920e+14, 5.925248511957505706e+14, 1.024057793713038672e+15,
+        1.785174045941642162e+15, 3.139306988668494696e+15, 5.569856270174890128e+15, 9.971763353834460328e+15,
+        1.801687491114883092e+16, 3.285709858322565542e+16, 6.049018540910759710e+16, 1.124375283211369572e+17,
+        2.110445125952435305e+17, 4.000737007891229992e+17, 7.660849361564329309e+17, 1.482018770996176700e+18,
+        2.896945433910857945e+18, 5.722790165693470493e+18, 1.142689960439921462e+19, 2.306616559984106723e+19,
+        4.707857184616093863e+19, 9.717346347495342813e+19, 2.028735605622585444e+20, 4.284840254171000581e+20,
+        9.157027329021623836e+20, 1.980457834766411777e+21, 4.335604886702252004e+21, 9.609258559714223995e+21,
+        2.156604630608586997e+22, 4.902045909695270289e+22, 1.128749227121328467e+23, 2.633414623049930879e+23,
+        6.226335684490998543e+23, 1.492205279014148921e+24, 3.625768249717590109e+24, 8.933899764961444882e+24,
+        2.232786981682262383e+25, 5.661295336293986732e+25, 1.456616710298133142e+26, 3.803959852868488245e+26,
+        1.008531585603036490e+27, 2.715247425129423358e+27, 7.425071766766651967e+27, 2.062860712173225003e+28,
+        5.824055458799413312e+28, 1.671388836696436644e+29, 4.876830632023956392e+29, 1.447170071146107156e+30,
+        4.368562208925583783e+30, 1.341873806249251338e+31, 4.195251632754338682e+31, 1.335360134828214136e+32,
+        4.328681350715136340e+32, 1.429401866150319186e+33, 4.809736146227180696e+33, 1.649624114567602575e+34,
+        5.768677492419801469e+34, 2.057442854162761350e+35, 7.486423509917811063e+35, 2.780052791791155051e+36,
+        1.053908347660081874e+37, 4.080046334235754223e+37, 1.613553311592805373e+38, 6.520836332997615098e+38,
+        2.693848186257510992e+39, 1.138002408430710800e+40, 4.917748008813924613e+40, 2.174691073191358676e+41,
+        9.844523745430526502e+41, 4.563707467590116732e+42, 2.167352073708379137e+43, 1.054860193887170754e+44,
+        5.263588225566847365e+44, 2.693772458797916623e+45, 1.414506760560163074e+46, 7.624126763512016620e+46,
+        4.219828148762794411e+47, 2.399387665831793264e+48, 1.402139947254117434e+49, 8.424706325525422943e+49,
+        5.206918479942619318e+50, 3.311787866477716151e+51, 2.168683295509859155e+52, 1.462786368779206713e+53,
+        1.016761784575838363e+54, 7.286460995145043184e+54, 5.386194237448865407e+55, 4.108917480528740640e+56,
+        3.236445625945552728e+57, 2.633440652417619669e+58, 2.214702339357939268e+59, 1.926058995948268392e+60,
+        1.733067740414174932e+61, 1.614307160124426969e+62, 1.557464328486352138e+63, 1.557226155197192031e+64,
+        1.614473962707995344e+65, 1.736617406327386105e+66, 1.939201243451190521e+67, 2.249277732936622876e+68,
+        2.711593798719765599e+69, 3.399628732048687119e+70, 4.435389696730206291e+71, 6.025566076164003981e+72,
+        8.529161425383779849e+73, 1.258746322992988688e+75, 1.938112175186560210e+76, 3.115432363572610661e+77,
+        5.231797674434390018e+78, 9.184930207860680757e+79, 1.686929404780378772e+81, 3.243565624474232635e+82,
+        6.533812498930220075e+83, 1.379898823144620314e+85, 3.057650444842839916e+86, 7.114050545839171245e+87,
+        1.739275024442258674e+89, 4.471782915853177804e+90, 1.210036789494028144e+92, 3.448828044590862359e+93,
+        1.036226783750561565e+95, 3.284801914751206038e+96, 1.099514933602224638e+98, 3.889581731378242597e+99,
+        1.455434287901069991e+101, 5.765729934387419019e+102, 2.420349568745475582e+104, 1.077606625929777536e+106,
+        5.093346988695851845e+107, 2.558090824110323997e+109, 1.366512508719047964e+111, 7.771735800763526406e+112,
+        4.710398638793014918e+114, 3.045563885587013954e+116, 2.102762552861442993e+118, 1.551937536212596136e+120,
+        1.225676354426075970e+122, 1.036950946169703711e+124, 9.407885268970827717e+125, 9.163369107785093171e+127,
+        9.592531095671168926e+129, 1.080486293361823875e+132, 1.311034829557782450e+134, 1.715642975932639188e+136,
+        2.424231742707881878e+138, 3.703231223333127919e+140, 6.123225027409988902e+142, 1.097271040771196765e+145,
+        2.133693643241295977e+147, 4.508099184895777328e+149, 1.036252806686291189e+152, };
+
+__constant__ double* m_abscissas_double[8] = {
+    m_abscissas_double_1,
+    m_abscissas_double_2,
+    m_abscissas_double_3,
+    m_abscissas_double_4,
+    m_abscissas_double_5,
+    m_abscissas_double_6,
+    m_abscissas_double_7,
+    m_abscissas_double_8,
+};
+
+__constant__ double m_weights_double_1[6] =
+      { 7.868241604839621507e+00, 8.805163880733011116e+02, 5.396278323520705668e+07, 8.876511896968161317e+19,
+        2.432791879269225553e+52, 6.399713512080202911e+139, };
+
+__constant__ double m_weights_double_2[6] =
+      { 2.398524276302635218e+00, 5.244596423726681022e+01, 6.457887819598201760e+04, 2.509985242511374506e+12,
+        1.774029269327138701e+32, 2.781406115983097314e+85, };
+
+__constant__ double m_weights_double_3[12] =
+      { 1.749369583108386852e+00, 3.979658981934607813e+00, 1.848514598574449570e+01, 1.864880718932067988e+02,
+        5.974205695263265855e+03, 1.270412635144623341e+06, 6.164193014295984071e+09, 5.230850031811222530e+15,
+        2.226260929943369774e+25, 1.199931102042181592e+41, 7.470602144275146214e+66, 1.814465860528410676e+109, };
+
+__constant__ double m_weights_double_4[24] =
+      { 1.613859062188366173e+00, 1.997767291869673262e+00, 3.020231979908834220e+00, 5.477641843859057761e+00,
+        1.179660916492671672e+01, 3.035504848518598294e+01, 9.584421793794920860e+01, 3.893870238229992076e+02,
+        2.179193250357911344e+03, 1.839208123964132852e+04, 2.632120612599856167e+05, 7.427296507169468210e+06,
+        5.015875648341232356e+08, 1.039610867241544113e+11, 9.100328911818091977e+13, 5.068651163890231571e+17,
+        3.039966520714902616e+22, 3.857740194672007962e+28, 2.465542763666581087e+36, 2.416439449167799461e+46,
+        1.517091553926604149e+59, 3.825043412021411380e+75, 4.089582396821598640e+96, 3.823775894295564050e+123, };
+
+__constant__ double m_weights_double_5[49] =
+      { 1.581465959536694744e+00, 1.669149910438534746e+00, 1.857523188595005770e+00, 2.175662623626994120e+00,
+        2.675901375211020564e+00, 3.447738682498791744e+00, 4.643946540355464126e+00, 6.530204496574248616e+00,
+        9.582285015566804961e+00, 1.468361407515440960e+01, 2.354449548740987533e+01, 3.963527273305166705e+01,
+        7.037635206267538547e+01, 1.325880124784838868e+02, 2.669625649541569172e+02, 5.793749198508472676e+02,
+        1.368691928321303605e+03, 3.559435721533130554e+03, 1.032186677270763318e+04, 3.386621302858741487e+04,
+        1.278166259840246830e+05, 5.654082513926693098e+05, 2.994462044781721833e+06, 1.944975023421914947e+07,
+        1.592193007690560588e+08, 1.694288818617459913e+09, 2.427156182311303271e+10, 4.870317848199455490e+11,
+        1.431819656229181793e+13, 6.489471523099301256e+14, 4.803757752508989106e+16, 6.200096361305331541e+18,
+        1.502568562439914899e+21, 7.436061367189688251e+23, 8.264761218677928603e+26, 2.297735027897804345e+30,
+        1.805449779569534997e+34, 4.604472360199061931e+38, 4.458371212030626854e+43, 1.957638261114809309e+49,
+        4.767368137162500764e+55, 8.088820139476721285e+62, 1.238260897349286357e+71, 2.292272505278842062e+80,
+        7.151392373749193549e+90, 5.476714850156044431e+102, 1.576655618370700681e+116, 2.765448595957851958e+131,
+        5.108051255283132673e+148, };
+
+__constant__ double m_weights_double_6[98] =
+      { 1.573457773573108386e+00, 1.594892755038663787e+00, 1.638536515530234742e+00, 1.705980408212213620e+00,
+        1.799724394608737275e+00, 1.923322854425656307e+00, 2.081597373313268178e+00, 2.280934883790070511e+00,
+        2.529697852387704655e+00, 2.838784782552951185e+00, 3.222395745020980612e+00, 3.699081358854235112e+00,
+        4.293188274330526800e+00, 5.036865356322330076e+00, 5.972871140910932199e+00, 7.158538424311077564e+00,
+        8.671427800892076385e+00, 1.061747360297922326e+01, 1.314285002260235600e+01, 1.645145625668428040e+01,
+        2.083099449998189069e+01, 2.669235989791640190e+01, 3.462993514791378189e+01, 4.551518362653662579e+01,
+        6.064408087764392116e+01, 8.197296917485846798e+01, 1.125020468081652564e+02, 1.569096552844714123e+02,
+        2.226204347868638276e+02, 3.216385489504077755e+02, 4.737574505945461739e+02, 7.122994548146997637e+02,
+        1.094609652686376553e+03, 1.721697789176049576e+03, 2.775924909253835146e+03, 4.595230066268149347e+03,
+        7.823427586641573672e+03, 1.372357435269105405e+04, 2.485188961645119553e+04, 4.655538745425972783e+04,
+        9.041766782135686884e+04, 1.824843964862728392e+05, 3.836800264094614027e+05, 8.426271970245168026e+05,
+        1.938432574158782634e+06, 4.685112849356485528e+06, 1.193528667218607927e+07, 3.215643752247989316e+07,
+        9.196008928386600386e+07, 2.802223178457559964e+08, 9.136110825267458886e+08, 3.200910900783148591e+09,
+        1.210765264234723689e+10, 4.969024745093101808e+10, 2.224315751863855216e+11, 1.092125344449313660e+12,
+        5.916882980019919359e+12, 3.559743438494577249e+13, 2.394353652945465191e+14, 1.813551073517501917e+15,
+        1.558736706166165738e+16, 1.532714875555114333e+17, 1.739274776190789212e+18, 2.298841216802216313e+19,
+        3.574030698837762664e+20, 6.604899705451419080e+21, 1.467155879591820659e+23, 3.964094964398509381e+24,
+        1.319342840595348793e+26, 5.482251971340400742e+27, 2.885137894723827518e+29, 1.952539840765392110e+31,
+        1.727051489032222797e+33, 2.031343507095439396e+35, 3.236074146972599980e+37, 7.120487412983497200e+39,
+        2.209552707411017265e+42, 9.886282647791384648e+44, 6.530514048788273529e+47, 6.530706672481546528e+50,
+        1.015518807431281951e+54, 2.526366773162394510e+57, 1.036450519906790297e+61, 7.241966032627135861e+64,
+        8.919402520769714938e+68, 2.008463619152992905e+73, 8.596914764830260020e+77, 7.290599546829495220e+82,
+        1.280199563216419112e+88, 4.878349285603201150e+93, 4.240828248064127940e+99, 8.869771764721598720e+105,
+        4.723342575741417669e+112, 6.802035963326188581e+119, 2.824531180990009549e+127, 3.621049216745982252e+135,
+        1.541270150334942520e+144, 2.353376995174362785e+153, };
+
+__constant__ double m_weights_double_7[196] =
+      { 1.571461316550783294e+00, 1.576790166316938345e+00, 1.587495640370383316e+00, 1.603673956341370210e+00,
+        1.625471125457493943e+00, 1.653085011915939302e+00, 1.686768142525911236e+00, 1.726831323537516202e+00,
+        1.773648138667236602e+00, 1.827660421478661448e+00, 1.889384817044018196e+00, 1.959420572855037091e+00,
+        2.038458728047908923e+00, 2.127292904083847225e+00, 2.226831940199076941e+00, 2.338114664555130296e+00,
+        2.462327148722991304e+00, 2.600822860927085164e+00, 2.755146214814554359e+00, 2.927060108424483555e+00,
+        3.118578166240921951e+00, 3.332002540339506630e+00, 3.569968300410740276e+00, 3.835495653996447262e+00,
+        4.132051496512934885e+00, 4.463622106699067881e+00, 4.834799191008006557e+00, 5.250881957765679608e+00,
+        5.717998490875333124e+00, 6.243250421598568105e+00, 6.834885801226541839e+00, 7.502506202789340802e+00,
+        8.257315484493544201e+00, 9.112419405864642634e+00, 1.008318749543997758e+01, 1.118769134993865202e+01,
+        1.244723705914106881e+01, 1.388701390605507587e+01, 1.553688715915900190e+01, 1.743237000680942831e+01,
+        1.961581894823993424e+01, 2.213790886354273806e+01, 2.505945934677137610e+01, 2.845370377742137561e+01,
+        3.240911845969524834e+01, 3.703296289480230161e+01, 4.245572644746267911e+01, 4.883673480337985582e+01,
+        5.637124640586975420e+01, 6.529947092752610340e+01, 7.591807755694122837e+01, 8.859494252391663822e+01,
+        1.037881295005788124e+02, 1.220704263969226746e+02, 1.441612098131200535e+02, 1.709680191245773511e+02,
+        2.036410593843575570e+02, 2.436450058708723643e+02, 2.928540812182076105e+02, 3.536786019152253392e+02,
+        4.292343083967296939e+02, 5.235701840488733027e+02, 6.419766898003024575e+02, 7.914052083668759283e+02,
+        9.810422089081931637e+02, 1.223099994999740393e+03, 1.533912555427112127e+03, 1.935464013605830339e+03,
+        2.457534549912886852e+03, 3.140733731623635519e+03, 4.040818188564651898e+03, 5.234881599712225681e+03,
+        6.830294457607329226e+03, 8.977713228649887143e+03, 1.189015920967326839e+04, 1.587122387044346962e+04,
+        2.135711106445789331e+04, 2.897983705189681437e+04, 3.966306726795547950e+04, 5.476875193750000787e+04,
+        7.632356539388055680e+04, 1.073719149754976951e+05, 1.525316674555574152e+05, 2.188778434744216586e+05,
+        3.173624496019295608e+05, 4.651201525869328462e+05, 6.892537656280580572e+05, 1.033119885120019982e+06,
+        1.566887981043252499e+06, 2.405492027026531795e+06, 3.739528964815910340e+06, 5.889121154895580032e+06,
+        9.399046351922342030e+06, 1.520903276129653518e+07, 2.496287187293576168e+07, 4.157759259963074840e+07,
+        7.030705366950267312e+07, 1.207598558452493366e+08, 2.107882509464846833e+08, 3.741047199023457864e+08,
+        6.754494594987415572e+08, 1.241316740415880537e+09, 2.323310032649552862e+09, 4.431176019026625759e+09,
+        8.617446487400900130e+09, 1.709836906604031513e+10, 3.463574521880171339e+10, 7.167607123799270726e+10,
+        1.516347620910054079e+11, 3.281729323238950526e+11, 7.271102600298280790e+11, 1.650499552378780378e+12,
+        3.841338149508803917e+12, 9.173744267785176575e+12, 2.249901946357519979e+13, 5.671535089900611731e+13,
+        1.470742250307697019e+14, 3.927012518464311775e+14, 1.080639977391212820e+15, 3.067671466720475189e+15,
+        8.992386789198328428e+15, 2.724722536524592111e+16, 8.542946122263389258e+16, 2.774613718725574755e+17,
+        9.345299479382029121e+17, 3.267996122987731882e+18, 1.187914433455468315e+19, 4.494053408418564214e+19,
+        1.771706652195486743e+20, 7.288102552885931527e+20, 3.132512430816625349e+21, 1.408743767951073110e+22,
+        6.638294268236060414e+22, 3.282543608403565013e+23, 1.705920098038394064e+24, 9.332259385148524285e+24,
+        5.382727175874888312e+25, 3.278954235122093249e+26, 2.113191697957458099e+27, 1.443411041499643040e+28,
+        1.046864394654982423e+29, 8.077319226958905700e+29, 6.643146963432616277e+30, 5.835670121359986260e+31,
+        5.486890296790230798e+32, 5.533726968508261614e+33, 5.999734996418352834e+34, 7.009176119466122569e+35,
+        8.844061966424597499e+36, 1.208226860869605961e+38, 1.791648514311063338e+39, 2.891313916713205762e+40,
+        5.091457860211527298e+41, 9.810630588402496553e+42, 2.074441239147378860e+44, 4.827650116937700540e+45,
+        1.240287939111549029e+47, 3.528782858644784616e+48, 1.115449490471696659e+50, 3.930510643328196314e+51,
+        1.549243712957852337e+53, 6.854998238041301002e+54, 3.417479961583207704e+56, 1.926905498641079990e+58,
+        1.233580963004919450e+60, 9.002819902898076915e+61, 7.521415141253441645e+63, 7.224277554900578993e+65,
+        8.012832830535078610e+67, 1.030999620286380369e+70, 1.546174957076748679e+72, 2.715803772613248694e+74,
+        5.615089920571746438e+76, 1.373667859345343337e+79, 3.997541020769625126e+81, 1.391500589339800087e+84,
+        5.826693844912022892e+86, 2.952274820929549096e+89, 1.821023061478466282e+92, 1.375973022137941526e+95,
+        1.281852367543412945e+98, 1.482130127201990503e+101, 2.141574273792435314e+104, 3.894495540947112380e+107,
+        8.978646362580102961e+110, 2.644131589807244050e+114, 1.002403539841913834e+118, 4.931412804903905259e+121,
+        3.174401112435865044e+125, 2.696624001761892390e+129, 3.049799322320447166e+133, 4.634041526818687785e+137,
+        9.548983134803106512e+141, 2.694404866192089829e+146, 1.051502720036395325e+151, 5.734170640626244955e+155, };
+
+__constant__ double m_weights_double_8[391] =
+      { 1.570962550997832611e+00, 1.572292902367211961e+00, 1.574956581912666755e+00, 1.578959553636163985e+00,
+        1.584310789563614305e+00, 1.591022301117035107e+00, 1.599109181186160337e+00, 1.608589657109067468e+00,
+        1.619485154826419743e+00, 1.631820374530739318e+00, 1.645623378191125679e+00, 1.660925689395424109e+00,
+        1.677762406016463717e+00, 1.696172326277082973e+00, 1.716198088860732467e+00, 1.737886327791014562e+00,
+        1.761287842885152410e+00, 1.786457786673686420e+00, 1.813455868772335587e+00, 1.842346578792652542e+00,
+        1.873199428986627521e+00, 1.906089217937612619e+00, 1.941096316736779451e+00, 1.978306979221816566e+00,
+        2.017813678003844337e+00, 2.059715468170813895e+00, 2.104118380732327493e+00, 2.151135848063375554e+00,
+        2.200889163814591418e+00, 2.253507979986114202e+00, 2.309130844113053375e+00, 2.367905779785113334e+00,
+        2.429990914023652954e+00, 2.495555155369085590e+00, 2.564778926893134514e+00, 2.637854958747451684e+00,
+        2.714989145296268067e+00, 2.796401472360280536e+00, 2.882327020626578700e+00, 2.973017051860293803e+00,
+        3.068740185193628238e+00, 3.169783671473487386e+00, 3.276454774427328601e+00, 3.389082268266156098e+00,
+        3.508018062292869136e+00, 3.633638964133530274e+00, 3.766348594369884204e+00, 3.906579466636309289e+00,
+        4.054795248667541120e+00, 4.211493221360917802e+00, 4.377206954666462219e+00, 4.552509221059946388e+00,
+        4.738015169510782826e+00, 4.934385785253587887e+00, 5.142331663338191074e+00, 5.362617126899976224e+00,
+        5.596064724397100194e+00, 5.843560143744373307e+00, 6.106057585381734693e+00, 6.384585640900671436e+00,
+        6.680253728973824449e+00, 6.994259146058412709e+00, 7.327894795748901060e+00, 7.682557667824588764e+00,
+        8.059758146071137270e+00, 8.461130232962342889e+00, 8.888442789395671080e+00, 9.343611899025485155e+00,
+        9.828714479494622022e+00, 1.034600327721380625e+01, 1.089792339849122916e+01, 1.148713054801325790e+01,
+        1.211651116619788555e+01, 1.278920468010096321e+01, 1.350862810871281096e+01, 1.427850329305334421e+01,
+        1.510288705493181327e+01, 1.598620462612703196e+01, 1.693328673269081128e+01, 1.794941076780000506e+01,
+        1.904034654190823159e+01, 2.021240716182964334e+01, 2.147250566192247370e+01, 2.282821809199713505e+01,
+        2.428785385941680425e+01, 2.586053422878117785e+01, 2.755628000354674426e+01, 2.938610955221109564e+01,
+        3.136214849990951329e+01, 3.349775258749912582e+01, 3.580764540799625468e+01, 3.830807296872530167e+01,
+        4.101697730155473447e+01, 4.395419165876113623e+01, 4.714166019494196927e+01, 5.060368545366659226e+01,
+        5.436720746019445252e+01, 5.846211877912138439e+01, 6.292162054058128784e+01, 6.778262518512416663e+01,
+        7.308621254265223015e+01, 7.887814686488147292e+01, 8.520946359734658334e+01, 9.213713603387774717e+01,
+        9.972483357670754649e+01, 1.080437851679046426e+02, 1.171737636088621692e+02, 1.272042089988687372e+02,
+        1.382355124664102373e+02, 1.503804848151483311e+02, 1.637660387526102742e+02, 1.785351181233383403e+02,
+        1.948489131607280604e+02, 2.128894073598352670e+02, 2.328623093447990790e+02, 2.550004322843281994e+02,
+        2.795675942672445782e+02, 3.068631259124280934e+02, 3.372270867451200874e+02, 3.710463099965576255e+02,
+        4.087614170466174911e+02, 4.508749684194593670e+02, 4.979609488959773491e+02, 5.506758209385785877e+02,
+        6.097714244663179092e+02, 6.761100535726473685e+02, 7.506821038741422446e+02, 8.346267600518081192e+02,
+        9.292562845315541998e+02, 1.036084578498234728e+03, 1.156860819661897657e+03, 1.293609142453808600e+03,
+        1.448675521854205144e+03, 1.624783259532197615e+03, 1.825098759915318560e+03, 2.053309635972617554e+03,
+        2.313717614494777200e+03, 2.611349236640186999e+03, 2.952087994093624299e+03, 3.342832332560548180e+03,
+        3.791684927756595099e+03, 4.308179838716318955e+03, 4.903555624570201673e+03, 5.591084343634811452e+03,
+        6.386468625571246341e+03, 7.308321829412979440e+03, 8.378749812799703561e+03, 9.624057218749638059e+03,
+        1.107560666191146008e+04, 1.277086605445904388e+04, 1.475468792019489452e+04, 1.708087537417066343e+04,
+        1.981410309695485051e+04, 2.303227888204754908e+04, 2.682945317928632535e+04, 3.131941178398428200e+04,
+        3.664012209706997997e+04, 4.295924836668690170e+04, 5.048100882639843572e+04, 5.945472133180055290e+04,
+        7.018547875172689579e+04, 8.304751726175694003e+04, 9.850099805053575446e+04, 1.171131266261766060e+05,
+        1.395847982160589845e+05, 1.667843016393077556e+05, 1.997900626520524686e+05, 2.399449946032992187e+05,
+        2.889257939838013232e+05, 3.488315309194304548e+05, 4.222972201496778447e+05, 5.126398246369253619e+05,
+        6.240464876221989792e+05, 7.618179073233615941e+05, 9.326839300224119257e+05, 1.145214007774297539e+06,
+        1.410352646274233119e+06, 1.742120041875863385e+06, 2.158531716934287014e+06, 2.682809410126426731e+06,
+        3.344980563595418861e+06, 4.183997972337706048e+06, 5.250558008165501752e+06, 6.610860174141680988e+06,
+        8.351639423967558693e+06, 1.058692532393929900e+07, 1.346715235106239409e+07, 1.719148271024263021e+07,
+        2.202453449027701694e+07, 2.831917301724337797e+07, 3.654767820268344932e+07, 4.734452657230626106e+07,
+        6.156534063509513873e+07, 8.036843026897869248e+07, 1.053280284359690289e+08, 1.385921689084126286e+08,
+        1.831036985925683524e+08, 2.429109457458640820e+08, 3.236062393759667463e+08, 4.329475218599986663e+08,
+        5.817432967962929479e+08, 7.851179789388191786e+08, 1.064329197627075307e+09, 1.449389582912945485e+09,
+        1.982866469377991849e+09, 2.725414314698094324e+09, 3.763867964111621444e+09, 5.223138814950990937e+09,
+        7.283785810644397704e+09, 1.020809642381158743e+10, 1.437899318470510521e+10, 2.035836812543633578e+10,
+        2.897499827080027444e+10, 4.145773751645494878e+10, 5.963837683872426287e+10, 8.626228483915530800e+10,
+        1.254667045389825180e+11, 1.835212982264913186e+11, 2.699812207400151604e+11, 3.994928452151922954e+11,
+        5.946380558701434550e+11, 8.904409967424091107e+11, 1.341551941677775838e+12, 2.033768550332151892e+12,
+        3.102627959875753214e+12, 4.763598321705862063e+12, 7.361420360560813584e+12, 1.145126961456557423e+13,
+        1.793314186996273926e+13, 2.827585501285792232e+13, 4.489297053678444669e+13, 7.177802872658499571e+13,
+        1.155855098545820625e+14, 1.874833886367883093e+14, 3.063510356402174454e+14, 5.043400653005970242e+14,
+        8.366163396892429890e+14, 1.398556351640947289e+15, 2.356335749516164682e+15, 4.001765167382637456e+15,
+        6.851375128404941445e+15, 1.182690111761543990e+16, 2.058673527013806443e+16, 3.613968784314904633e+16,
+        6.399112184394213551e+16, 1.143016185628376923e+17, 2.059881383915666443e+17, 3.745846788353680914e+17,
+        6.874443034683149068e+17, 1.273407643613485314e+18, 2.381241916829895366e+18, 4.495835617307108399e+18,
+        8.571442024901952701e+18, 1.650443584181656965e+19, 3.210100352421317851e+19, 6.307780124442703091e+19,
+        1.252404031157661279e+20, 2.513005295649985394e+20, 5.096776255690838436e+20, 1.045019200016673046e+21,
+        2.166476479260878466e+21, 4.542138145678395463e+21, 9.632082324449137128e+21, 2.066386536688254528e+22,
+        4.485529785554428251e+22, 9.853879573610977508e+22, 2.191158874464374408e+23, 4.932835964390971668e+23,
+        1.124501529971774363e+24, 2.596269136156756008e+24, 6.072292938313625501e+24, 1.438989066308003836e+25,
+        3.455841956406570469e+25, 8.412655191713576490e+25, 2.076289061650816510e+26, 5.196515024640220322e+26,
+        1.319173194089644043e+27, 3.397455895980380794e+27, 8.879057454438503591e+27, 2.355272361492064126e+28,
+        6.342762007722624824e+28, 1.734531093990859705e+29, 4.817893170606830871e+29, 1.359597346490148232e+30,
+        3.898969689906500392e+30, 1.136542986529989936e+31, 3.368450043991780017e+31, 1.015304084709817260e+32,
+        3.113144376221918237e+32, 9.713072739730140403e+32, 3.084517643581725946e+33, 9.972682139820497284e+33,
+        3.283625052288491586e+34, 1.101378785390827536e+35, 3.764333367592714297e+35, 1.311403465938242926e+36,
+        4.658135710682813672e+36, 1.687517347470511392e+37, 6.237053685018323490e+37, 2.352571314427744869e+38,
+        9.058938240219699936e+38, 3.562249097611136071e+39, 1.430959291578558210e+40, 5.873974584984375049e+40,
+        2.464828549811283787e+41, 1.057649203090855628e+42, 4.642475639281078035e+42, 2.085287118272421779e+43,
+        9.588439985186632177e+43, 4.514982011246092280e+44, 2.177974048341973204e+45, 1.076720976822900458e+46,
+        5.457267432929085589e+46, 2.836869270455781134e+47, 1.513103201392011626e+48, 8.283974667225617075e+48,
+        4.657239491995971344e+49, 2.689796370712836937e+50, 1.596597846911970388e+51, 9.744154538256586629e+51,
+        6.117238394843313065e+52, 3.952049650585241827e+53, 2.628701592074258213e+54, 1.800990196502679393e+55,
+        1.271554462563068383e+56, 9.255880104477760711e+56, 6.949737920133919393e+57, 5.385167200769965621e+58,
+        4.308493668102978774e+59, 3.560951557542178371e+60, 3.041888528384649992e+61, 2.687094441930837189e+62,
+        2.455920538900000855e+63, 2.323648254168641537e+64, 2.277129741584892331e+65, 2.312633552913224734e+66,
+        2.435407592981291129e+67, 2.660910388822465246e+68, 3.018105943423533920e+69, 3.555823489510192503e+70,
+        4.354188877793849013e+71, 5.544975795511813315e+72, 7.348276481909886336e+73, 1.013998025722423261e+75,
+        1.457911462244607943e+76, 2.185488876819505295e+77, 3.418022153286623008e+78, 5.580843920601835728e+79,
+        9.519586502799733908e+80, 1.697573578247197786e+82, 3.166906670990180014e+83, 6.185099106418675430e+84,
+        1.265541134386934377e+86, 2.714828965877756899e+87, 6.110386802964494082e+88, 1.444054086171083239e+90,
+        3.586083726638388165e+91, 9.365231868063239600e+92, 2.574080116205122449e+94, 7.452134689862302719e+95,
+        2.274309903836169819e+97, 7.323011134121164749e+98, 2.489816421737932462e+100, 8.946533386359281588e+101,
+        3.400401372391165979e+103, 1.368288186208928217e+105, 5.834277489829591931e+106, 2.638486937672383424e+108,
+        1.266728882767139521e+110, 6.462225178314182803e+111, 3.506432320607573604e+113, 2.025608933943268165e+115,
+        1.247041677084784707e+117, 8.189865188405279038e+118, 5.743610894406099965e+120, 4.305808934084489763e+122,
+        3.454156966079496755e+124, 2.968316601530352737e+126, 2.735456242372183592e+128, 2.706317176690077847e+130,
+        2.877679916342060385e+132, 3.292412878268106390e+134, 4.057840961953725969e+136, 5.393783049105737324e+138,
+        7.741523901672235406e+140, 1.201209962310668456e+143, 2.017456079556807301e+145, 3.672176623483062526e+147,
+        7.253163798058577630e+149, 1.556591535302570570e+152, 3.634399832790394885e+154, };
+
+__constant__ double* m_weights_double[8] = {
+    m_weights_double_1,
+    m_weights_double_2,
+    m_weights_double_3,
+    m_weights_double_4,
+    m_weights_double_5,
+    m_weights_double_6,
+    m_weights_double_7,
+    m_weights_double_8
+};
+__constant__ boost::math::size_t float_coefficients_size[8] = {4, 4, 8, 16, 32, 65, 129, 259};
+
+__constant__ boost::math::size_t double_coefficients_size[8] = {6, 6, 12, 24, 49, 98, 196, 391};
+
+template<typename T>
+struct coefficients_selector;
+
+template<>
+struct coefficients_selector<float>
+{
+    __device__ static const auto abscissas() { return m_abscissas_float; }
+    __device__ static const auto weights() { return m_weights_float; }
+    __device__ static const auto size() { return float_coefficients_size; }
+};
+
+template<>
+struct coefficients_selector<double>
+{
+    __device__ static const auto abscissas() { return m_abscissas_double; }
+    __device__ static const auto weights() { return m_weights_double; }
+    __device__ static const auto size() { return double_coefficients_size; }
+};
+
+template <class F, class Real, class Policy = boost::math::policies::policy<> >
+__device__ auto sinh_sinh_integrate_impl(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+{
+    BOOST_MATH_STD_USING
+    using boost::math::constants::half;
+    using boost::math::constants::half_pi;
+    using boost::math::size_t;
+
+    constexpr auto function = "boost::math::quadrature::sinh_sinh<%1%>::integrate";
+
+    using K = decltype(f(static_cast<Real>(0)));
+    static_assert(!boost::math::is_integral<K>::value,
+                  "The return type cannot be integral, it must be either a real or complex floating point type.");
+
+    K y_max = f(boost::math::tools::max_value<Real>());
+    
+    if(abs(y_max) > boost::math::tools::epsilon<Real>())
+    {
+        return static_cast<K>(policies::raise_domain_error(function,
+           "The function you are trying to integrate does not go to zero at infinity, and instead evaluates to %1%", y_max, Policy()));
+    }
+
+    K y_min = f(-boost::math::tools::max_value<Real>());
+    
+    if(abs(y_min) > boost::math::tools::epsilon<Real>())
+    {
+        return static_cast<K>(policies::raise_domain_error(function,
+           "The function you are trying to integrate does not go to zero at -infinity, and instead evaluates to %1%", y_max, Policy()));
+    }
+
+    // Get the party started with two estimates of the integral:
+    const auto m_abscissas = coefficients_selector<Real>::abscissas();
+    const auto m_weights = coefficients_selector<Real>::weights();
+    const auto m_size = coefficients_selector<Real>::size();
+
+    K I0 = f(0)*half_pi<Real>();
+    Real L1_I0 = abs(I0);
+    for(size_t i = 0; i < m_size[0]; ++i)
+    {
+        Real x = m_abscissas[0][i];
+        K yp = f(x);
+        K ym = f(-x);
+        I0 += (yp + ym)*m_weights[0][i];
+        L1_I0 += (abs(yp)+abs(ym))*m_weights[0][i];
+    }
+
+    K I1 = I0;
+    Real L1_I1 = L1_I0;
+    for (size_t i = 0; i < m_size[1]; ++i)
+    {
+        Real x= m_abscissas[1][i];
+        K yp = f(x);
+        K ym = f(-x);
+        I1 += (yp + ym)*m_weights[1][i];
+        L1_I1 += (abs(yp) + abs(ym))*m_weights[1][i];
+    }
+
+    I1 *= half<Real>();
+    L1_I1 *= half<Real>();
+    Real err = abs(I0 - I1);
+
+    size_t i = 2;
+    for(; i <= 8U; ++i)
+    {
+        I0 = I1;
+        L1_I0 = L1_I1;
+
+        I1 = half<Real>()*I0;
+        L1_I1 = half<Real>()*L1_I0;
+        Real h = static_cast<Real>(1) / static_cast<Real>(1 << i);
+        K sum = 0;
+        Real absum = 0;
+
+        Real abterm1 = 1;
+        Real eps = boost::math::tools::epsilon<Real>()*L1_I1;
+
+        auto abscissa_row = m_abscissas[i];
+        auto weight_row = m_weights[i];
+
+        for(size_t j = 0; j < m_size[i]; ++j)
+        {
+            Real x = abscissa_row[j];
+            K yp = f(x);
+            K ym = f(-x);
+            sum += (yp + ym)*weight_row[j];
+            Real abterm0 = (abs(yp) + abs(ym))*weight_row[j];
+            absum += abterm0;
+
+            // We require two consecutive terms to be < eps in case we hit a zero of f.
+            if (x > static_cast<Real>(100) && abterm0 < eps && abterm1 < eps)
+            {
+                break;
+            }
+            abterm1 = abterm0;
+        }
+
+        I1 += sum*h;
+        L1_I1 += absum*h;
+        err = abs(I0 - I1);
+
+        if (!(boost::math::isfinite)(L1_I1))
+        {
+            constexpr auto err_msg = "The sinh_sinh quadrature evaluated your function at a singular point, leading to the value %1%.\n"
+               "sinh_sinh quadrature cannot handle singularities in the domain.\n"
+               "If you are sure your function has no singularities, please submit a bug against boost.math\n";
+            return static_cast<K>(policies::raise_evaluation_error(function, err_msg, I1, Policy()));
+        }
+        if (err <= tolerance*L1_I1)
+        {
+            break;
+        }
+    }
+
+    if (error)
+    {
+        *error = err;
+    }
+
+    if (L1)
+    {
+        *L1 = L1_I1;
+    }
+
+    if (levels)
+    {
+       *levels = i;
+    }
+
+    return I1;
+}
+
+} // Namespace detail
+} // Namespace quadrature
+} // Namespace math
+} // Namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
+#endif // BOOST_MATH_QUADRATURE_DETAIL_SINH_SINH_DETAIL_HPP
diff --git a/include/boost/math/quadrature/exp_sinh.hpp b/include/boost/math/quadrature/exp_sinh.hpp
index f28493737e..d3148e0c0a 100644
--- a/include/boost/math/quadrature/exp_sinh.hpp
+++ b/include/boost/math/quadrature/exp_sinh.hpp
@@ -15,11 +15,15 @@
 #ifndef BOOST_MATH_QUADRATURE_EXP_SINH_HPP
 #define BOOST_MATH_QUADRATURE_EXP_SINH_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <limits>
 #include <memory>
 #include <string>
-#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>
 
 namespace boost{ namespace math{ namespace quadrature {
 
@@ -98,4 +102,79 @@ auto exp_sinh<Real, Policy>::integrate(const F& f, Real tolerance, Real* error,
 
 
 }}}
-#endif
+
+#endif // BOOST_MATH_HAS_NVRTC
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/constants/constants.hpp>
+
+namespace boost { 
+namespace math { 
+namespace quadrature {
+
+template <class F, class Real, class Policy = policies::policy<> >
+__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+{
+    BOOST_MATH_STD_USING
+
+    using K = decltype(f(a));
+    static_assert(!boost::math::is_integral<K>::value,
+                  "The return type cannot be integral, it must be either a real or complex floating point type.");
+
+    constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
+
+    // Neither limit may be a NaN:
+    if((boost::math::isnan)(a) || (boost::math::isnan)(b))
+    {
+       return static_cast<K>(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy()));
+    }
+    // Right limit is infinite:
+    if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value<Real>()))
+    {
+        // If a = 0, don't use an additional level of indirection:
+        if (a == static_cast<Real>(0))
+        {
+            return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
+        }
+        const auto u = [&](Real t)->K { return f(t + a); };
+        return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
+    }
+
+    if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value<Real>())
+    {
+        const auto u = [&](Real t)->K { return f(b-t);};
+        return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
+    }
+
+    // Infinite limits:
+    if ((a <= -boost::math::tools::max_value<Real>()) && (b >= boost::math::tools::max_value<Real>()))
+    {
+        return static_cast<K>(policies::raise_domain_error(function, "Use sinh_sinh quadrature for integration over the whole real line; exp_sinh is for half infinite integrals.", a, Policy()));
+    }
+    // If we get to here then both ends must necessarily be finite:
+    return static_cast<K>(policies::raise_domain_error(function, "Use tanh_sinh quadrature for integration over finite domains; exp_sinh is for half infinite integrals.", a, Policy()));
+}
+
+template <class F, class Real, class Policy = policies::policy<> >
+__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
+{
+    BOOST_MATH_STD_USING
+    constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
+    if (abs(tolerance) > 1) {
+        return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy());
+    }
+    return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
+}
+
+} // namespace quadrature
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
+#endif // BOOST_MATH_QUADRATURE_EXP_SINH_HPP
diff --git a/include/boost/math/quadrature/sinh_sinh.hpp b/include/boost/math/quadrature/sinh_sinh.hpp
index ed958eb8d2..7aabcb4376 100644
--- a/include/boost/math/quadrature/sinh_sinh.hpp
+++ b/include/boost/math/quadrature/sinh_sinh.hpp
@@ -1,4 +1,5 @@
 // Copyright Nick Thompson, 2017
+// Copyright Matt Borland, 2024
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -15,10 +16,17 @@
 #ifndef BOOST_MATH_QUADRATURE_SINH_SINH_HPP
 #define BOOST_MATH_QUADRATURE_SINH_SINH_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <limits>
 #include <memory>
-#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
 
 namespace boost{ namespace math{ namespace quadrature {
 
@@ -40,4 +48,25 @@ class sinh_sinh
 };
 
 }}}
-#endif
+
+#endif // BOOST_MATH_HAS_NVRTC
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+namespace boost {
+namespace math {
+namespace quadrature {
+
+template <class F, class Real, class Policy = boost::math::policies::policy<> >
+__device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon<Real>(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr)
+{
+    return detail::sinh_sinh_integrate_impl(f, tol, error, L1, levels);
+}
+
+} // namespace quadrature
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
+#endif // BOOST_MATH_QUADRATURE_SINH_SINH_HPP
diff --git a/include/boost/math/special_functions/airy.hpp b/include/boost/math/special_functions/airy.hpp
index 06eee92383..65114089a6 100644
--- a/include/boost/math/special_functions/airy.hpp
+++ b/include/boost/math/special_functions/airy.hpp
@@ -1,4 +1,5 @@
 // Copyright John Maddock 2012.
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -7,19 +8,24 @@
 #ifndef BOOST_MATH_AIRY_HPP
 #define BOOST_MATH_AIRY_HPP
 
-#include <limits>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/bessel.hpp>
 #include <boost/math/special_functions/cbrt.hpp>
 #include <boost/math/special_functions/detail/airy_ai_bi_zero.hpp>
 #include <boost/math/tools/roots.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/constants/constants.hpp>
 
 namespace boost{ namespace math{
 
 namespace detail{
 
 template <class T, class Policy>
-T airy_ai_imp(T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T airy_ai_imp(T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -57,7 +63,7 @@ T airy_ai_imp(T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-T airy_bi_imp(T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T airy_bi_imp(T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -90,7 +96,7 @@ T airy_bi_imp(T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-T airy_ai_prime_imp(T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T airy_ai_prime_imp(T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -125,7 +131,7 @@ T airy_ai_prime_imp(T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-T airy_bi_prime_imp(T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T airy_bi_prime_imp(T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -156,7 +162,7 @@ T airy_bi_prime_imp(T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-T airy_ai_zero_imp(int m, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T airy_ai_zero_imp(int m, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names, needed for log, sqrt.
 
@@ -209,7 +215,7 @@ T airy_ai_zero_imp(int m, const Policy& pol)
 }
 
 template <class T, class Policy>
-T airy_bi_zero_imp(int m, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T airy_bi_zero_imp(int m, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names, needed for log, sqrt.
 
@@ -263,7 +269,7 @@ T airy_bi_zero_imp(int m, const Policy& pol)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type airy_ai(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_ai(T x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<T>::type result_type;
@@ -279,13 +285,13 @@ inline typename tools::promote_args<T>::type airy_ai(T x, const Policy&)
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type airy_ai(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_ai(T x)
 {
    return airy_ai(x, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type airy_bi(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_bi(T x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<T>::type result_type;
@@ -301,13 +307,13 @@ inline typename tools::promote_args<T>::type airy_bi(T x, const Policy&)
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type airy_bi(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_bi(T x)
 {
    return airy_bi(x, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type airy_ai_prime(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_ai_prime(T x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<T>::type result_type;
@@ -323,13 +329,13 @@ inline typename tools::promote_args<T>::type airy_ai_prime(T x, const Policy&)
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type airy_ai_prime(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_ai_prime(T x)
 {
    return airy_ai_prime(x, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type airy_bi_prime(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_bi_prime(T x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<T>::type result_type;
@@ -345,13 +351,13 @@ inline typename tools::promote_args<T>::type airy_bi_prime(T x, const Policy&)
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type airy_bi_prime(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type airy_bi_prime(T x)
 {
    return airy_bi_prime(x, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline T airy_ai_zero(int m, const Policy& /*pol*/)
+BOOST_MATH_GPU_ENABLED inline T airy_ai_zero(int m, const Policy& /*pol*/)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename policies::evaluation<T, Policy>::type value_type;
@@ -371,13 +377,13 @@ inline T airy_ai_zero(int m, const Policy& /*pol*/)
 }
 
 template <class T>
-inline T airy_ai_zero(int m)
+BOOST_MATH_GPU_ENABLED inline T airy_ai_zero(int m)
 {
    return airy_ai_zero<T>(m, policies::policy<>());
 }
 
 template <class T, class OutputIterator, class Policy>
-inline OutputIterator airy_ai_zero(
+BOOST_MATH_GPU_ENABLED inline OutputIterator airy_ai_zero(
                          int start_index,
                          unsigned number_of_zeros,
                          OutputIterator out_it,
@@ -399,7 +405,7 @@ inline OutputIterator airy_ai_zero(
 }
 
 template <class T, class OutputIterator>
-inline OutputIterator airy_ai_zero(
+BOOST_MATH_GPU_ENABLED inline OutputIterator airy_ai_zero(
                          int start_index,
                          unsigned number_of_zeros,
                          OutputIterator out_it)
@@ -408,7 +414,7 @@ inline OutputIterator airy_ai_zero(
 }
 
 template <class T, class Policy>
-inline T airy_bi_zero(int m, const Policy& /*pol*/)
+BOOST_MATH_GPU_ENABLED inline T airy_bi_zero(int m, const Policy& /*pol*/)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename policies::evaluation<T, Policy>::type value_type;
@@ -428,13 +434,13 @@ inline T airy_bi_zero(int m, const Policy& /*pol*/)
 }
 
 template <typename T>
-inline T airy_bi_zero(int m)
+BOOST_MATH_GPU_ENABLED inline T airy_bi_zero(int m)
 {
    return airy_bi_zero<T>(m, policies::policy<>());
 }
 
 template <class T, class OutputIterator, class Policy>
-inline OutputIterator airy_bi_zero(
+BOOST_MATH_GPU_ENABLED inline OutputIterator airy_bi_zero(
                          int start_index,
                          unsigned number_of_zeros,
                          OutputIterator out_it,
@@ -456,7 +462,7 @@ inline OutputIterator airy_bi_zero(
 }
 
 template <class T, class OutputIterator>
-inline OutputIterator airy_bi_zero(
+BOOST_MATH_GPU_ENABLED inline OutputIterator airy_bi_zero(
                          int start_index,
                          unsigned number_of_zeros,
                          OutputIterator out_it)
diff --git a/include/boost/math/special_functions/atanh.hpp b/include/boost/math/special_functions/atanh.hpp
index 543fb5fce3..9d73e568c0 100644
--- a/include/boost/math/special_functions/atanh.hpp
+++ b/include/boost/math/special_functions/atanh.hpp
@@ -15,7 +15,7 @@
 #pragma once
 #endif
 
-#include <cmath>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/precision.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
@@ -33,10 +33,10 @@ namespace boost
         // This is the main fare
 
         template<typename T, typename Policy>
-        inline T    atanh_imp(const T x, const Policy& pol)
+        BOOST_MATH_GPU_ENABLED inline T    atanh_imp(const T x, const Policy& pol)
         {
             BOOST_MATH_STD_USING
-            static const char* function = "boost::math::atanh<%1%>(%1%)";
+            constexpr auto function = "boost::math::atanh<%1%>(%1%)";
 
             if(x < -1)
             {
@@ -87,7 +87,7 @@ namespace boost
        }
 
         template<typename T, typename Policy>
-        inline typename tools::promote_args<T>::type atanh(T x, const Policy&)
+        BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type atanh(T x, const Policy&)
         {
             typedef typename tools::promote_args<T>::type result_type;
             typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -102,7 +102,7 @@ namespace boost
               "boost::math::atanh<%1%>(%1%)");
         }
         template<typename T>
-        inline typename tools::promote_args<T>::type atanh(T x)
+        BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type atanh(T x)
         {
            return boost::math::atanh(x, policies::policy<>());
         }
diff --git a/include/boost/math/special_functions/bessel.hpp b/include/boost/math/special_functions/bessel.hpp
index e9677d3c79..c32f251bcd 100644
--- a/include/boost/math/special_functions/bessel.hpp
+++ b/include/boost/math/special_functions/bessel.hpp
@@ -15,8 +15,14 @@
 #  pragma once
 #endif
 
-#include <limits>
-#include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/rational.hpp>
+#include <boost/math/tools/promotion.hpp>
+#include <boost/math/tools/series.hpp>
+#include <boost/math/tools/roots.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/special_functions/detail/bessel_jy.hpp>
 #include <boost/math/special_functions/detail/bessel_jn.hpp>
 #include <boost/math/special_functions/detail/bessel_yn.hpp>
@@ -31,10 +37,8 @@
 #include <boost/math/special_functions/sinc.hpp>
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/special_functions/round.hpp>
-#include <boost/math/tools/rational.hpp>
-#include <boost/math/tools/promotion.hpp>
-#include <boost/math/tools/series.hpp>
-#include <boost/math/tools/roots.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/special_functions/math_fwd.hpp>
 
 #ifdef _MSC_VER
 # pragma warning(push)
@@ -50,7 +54,7 @@ struct sph_bessel_j_small_z_series_term
 {
    typedef T result_type;
 
-   sph_bessel_j_small_z_series_term(unsigned v_, T x)
+   BOOST_MATH_GPU_ENABLED sph_bessel_j_small_z_series_term(unsigned v_, T x)
       : N(0), v(v_)
    {
       BOOST_MATH_STD_USING
@@ -64,7 +68,7 @@ struct sph_bessel_j_small_z_series_term
          term = pow(mult, T(v)) / boost::math::tgamma(v+1+T(0.5f), Policy());
       mult *= -mult;
    }
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T r = term;
       ++N;
@@ -79,11 +83,11 @@ struct sph_bessel_j_small_z_series_term
 };
 
 template <class T, class Policy>
-inline T sph_bessel_j_small_z_series(unsigned v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T sph_bessel_j_small_z_series(unsigned v, T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
    sph_bessel_j_small_z_series_term<T, Policy> s(v, x);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
    T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<T, Policy>(), max_iter);
 
@@ -92,10 +96,21 @@ inline T sph_bessel_j_small_z_series(unsigned v, T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-T cyl_bessel_j_imp(T v, T x, const bessel_no_int_tag& t, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T cyl_bessel_j_imp_final(T v, T x, const bessel_no_int_tag& t, const Policy& pol)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::bessel_j<%1%>(%1%,%1%)";
+
+   T result_J, y; // LCOV_EXCL_LINE
+   bessel_jy(v, x, &result_J, &y, need_j, pol);
+   return result_J;
+}
+
+// Dispatch funtion to avoid recursion
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED T cyl_bessel_j_imp(T v, T x, const bessel_no_int_tag& t, const Policy& pol)
+{
+   BOOST_MATH_STD_USING
+
    if(x < 0)
    {
       // better have integer v:
@@ -105,23 +120,27 @@ T cyl_bessel_j_imp(T v, T x, const bessel_no_int_tag& t, const Policy& pol)
          // This branch is hit by multiprecision types only, and is
          // tested by our real_concept tests, but thee are excluded from coverage
          // due to time constraints.
-         T r = cyl_bessel_j_imp(v, T(-x), t, pol);
+         T r = cyl_bessel_j_imp_final(T(v), T(-x), t, pol);
          if (iround(v, pol) & 1)
+         {
             r = -r;
+         }
+
          return r;
          // LCOV_EXCL_STOP
       }
       else
+      {
+         constexpr auto function = "boost::math::bessel_j<%1%>(%1%,%1%)";
          return policies::raise_domain_error<T>(function, "Got x = %1%, but we need x >= 0", x, pol);
+      }
    }
 
-   T result_J, y; // LCOV_EXCL_LINE
-   bessel_jy(v, x, &result_J, &y, need_j, pol);
-   return result_J;
+   return cyl_bessel_j_imp_final(T(v), T(x), t, pol);
 }
 
 template <class T, class Policy>
-inline T cyl_bessel_j_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_bessel_j_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol)
 {
    BOOST_MATH_STD_USING  // ADL of std names.
    int ival = detail::iconv(v, pol);
@@ -135,14 +154,14 @@ inline T cyl_bessel_j_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& p
 }
 
 template <class T, class Policy>
-inline T cyl_bessel_j_imp(int v, T x, const bessel_int_tag&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_bessel_j_imp(int v, T x, const bessel_int_tag&, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    return bessel_jn(v, x, pol);
 }
 
 template <class T, class Policy>
-inline T sph_bessel_j_imp(unsigned n, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T sph_bessel_j_imp(unsigned n, T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
    if(x < 0)
@@ -171,7 +190,7 @@ inline T sph_bessel_j_imp(unsigned n, T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-T cyl_bessel_i_imp(T v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T cyl_bessel_i_imp_final(T v, T x, const Policy& pol)
 {
    //
    // This handles all the bessel I functions, note that we don't optimise
@@ -180,20 +199,7 @@ T cyl_bessel_i_imp(T v, T x, const Policy& pol)
    // case has better error handling too).
    //
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::cyl_bessel_i<%1%>(%1%,%1%)";
-   if(x < 0)
-   {
-      // better have integer v:
-      if(floor(v) == v)
-      {
-         T r = cyl_bessel_i_imp(v, T(-x), pol);
-         if(iround(v, pol) & 1)
-            r = -r;
-         return r;
-      }
-      else
-         return policies::raise_domain_error<T>(function, "Got x = %1%, but we need x >= 0", x, pol);
-   }
+   constexpr auto function = "boost::math::cyl_bessel_i<%1%>(%1%,%1%)";
    if(x == 0)
    {
       if(v < 0) 
@@ -210,7 +216,7 @@ T cyl_bessel_i_imp(T v, T x, const Policy& pol)
       }
       return sqrt(2 / (x * constants::pi<T>())) * sinh(x);
    }
-   if((policies::digits<T, Policy>() <= 113) && (std::numeric_limits<T>::digits <= 113) && (std::numeric_limits<T>::radix == 2))
+   if((policies::digits<T, Policy>() <= 113) && (boost::math::numeric_limits<T>::digits <= 113) && (boost::math::numeric_limits<T>::radix == 2))
    {
       if(v == 0)
       {
@@ -228,10 +234,39 @@ T cyl_bessel_i_imp(T v, T x, const Policy& pol)
    return result_I;
 }
 
+// Additional dispatch function to get the GPU impls happy
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED T cyl_bessel_i_imp(T v, T x, const Policy& pol)
+{
+   BOOST_MATH_STD_USING
+   constexpr auto function = "boost::math::cyl_bessel_i<%1%>(%1%,%1%)";
+
+   if(x < 0)
+   {
+      // better have integer v:
+      if(floor(v) == v)
+      {
+         T r = cyl_bessel_i_imp_final(T(v), T(-x), pol);
+         if(iround(v, pol) & 1)
+         {
+            r = -r;
+         }
+         
+         return r;
+      }
+      else
+      {
+         return policies::raise_domain_error<T>(function, "Got x = %1%, but we need x >= 0", x, pol);
+      }
+   }
+   
+   return cyl_bessel_i_imp_final(T(v), T(x), pol);
+}
+
 template <class T, class Policy>
-inline T cyl_bessel_k_imp(T v, T x, const bessel_no_int_tag& /* t */, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_bessel_k_imp(T v, T x, const bessel_no_int_tag& /* t */, const Policy& pol)
 {
-   static const char* function = "boost::math::cyl_bessel_k<%1%>(%1%,%1%)";
+   constexpr auto function = "boost::math::cyl_bessel_k<%1%>(%1%,%1%)";
    BOOST_MATH_STD_USING
    if(x < 0)
    {
@@ -248,7 +283,7 @@ inline T cyl_bessel_k_imp(T v, T x, const bessel_no_int_tag& /* t */, const Poli
 }
 
 template <class T, class Policy>
-inline T cyl_bessel_k_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_bessel_k_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    if((floor(v) == v))
@@ -259,15 +294,15 @@ inline T cyl_bessel_k_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& p
 }
 
 template <class T, class Policy>
-inline T cyl_bessel_k_imp(int v, T x, const bessel_int_tag&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_bessel_k_imp(int v, T x, const bessel_int_tag&, const Policy& pol)
 {
    return bessel_kn(v, x, pol);
 }
 
 template <class T, class Policy>
-inline T cyl_neumann_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_neumann_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol)
 {
-   static const char* function = "boost::math::cyl_neumann<%1%>(%1%,%1%)";
+   constexpr auto function = "boost::math::cyl_neumann<%1%>(%1%,%1%)";
 
    BOOST_MATH_INSTRUMENT_VARIABLE(v);
    BOOST_MATH_INSTRUMENT_VARIABLE(x);
@@ -291,7 +326,7 @@ inline T cyl_neumann_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol)
 }
 
 template <class T, class Policy>
-inline T cyl_neumann_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_neumann_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -310,16 +345,16 @@ inline T cyl_neumann_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& po
 }
 
 template <class T, class Policy>
-inline T cyl_neumann_imp(int v, T x, const bessel_int_tag&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_neumann_imp(int v, T x, const bessel_int_tag&, const Policy& pol)
 {
    return bessel_yn(v, x, pol);
 }
 
 template <class T, class Policy>
-inline T sph_neumann_imp(unsigned v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T sph_neumann_imp(unsigned v, T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
-   static const char* function = "boost::math::sph_neumann<%1%>(%1%,%1%)";
+   constexpr auto function = "boost::math::sph_neumann<%1%>(%1%,%1%)";
    //
    // Nothing much to do here but check for errors, and
    // evaluate the function's definition directly:
@@ -340,11 +375,11 @@ inline T sph_neumann_imp(unsigned v, T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names, needed for floor.
 
-   static const char* function = "boost::math::cyl_bessel_j_zero<%1%>(%1%, int)";
+   constexpr auto function = "boost::math::cyl_bessel_j_zero<%1%>(%1%, int)";
 
    const T half_epsilon(boost::math::tools::epsilon<T>() / 2U);
 
@@ -395,7 +430,7 @@ inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol)
    const T guess_root = boost::math::detail::bessel_zero::cyl_bessel_j_zero_detail::initial_guess<T, Policy>((order_is_integer ? vv : v), m, pol);
 
    // Select the maximum allowed iterations from the policy.
-   std::uintmax_t number_of_iterations = policies::get_max_root_iterations<Policy>();
+   boost::math::uintmax_t number_of_iterations = policies::get_max_root_iterations<Policy>();
 
    const T delta_lo = ((guess_root > 0.2F) ? T(0.2) : T(guess_root / 2U));
 
@@ -418,11 +453,11 @@ inline T cyl_bessel_j_zero_imp(T v, int m, const Policy& pol)
 }
 
 template <class T, class Policy>
-inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names, needed for floor.
 
-   static const char* function = "boost::math::cyl_neumann_zero<%1%>(%1%, int)";
+   constexpr auto function = "boost::math::cyl_neumann_zero<%1%>(%1%, int)";
 
    // Handle non-finite order.
    if (!(boost::math::isfinite)(v) )
@@ -473,7 +508,7 @@ inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol)
    const T guess_root = boost::math::detail::bessel_zero::cyl_neumann_zero_detail::initial_guess<T, Policy>(v, m, pol);
 
    // Select the maximum allowed iterations from the policy.
-   std::uintmax_t number_of_iterations = policies::get_max_root_iterations<Policy>();
+   boost::math::uintmax_t number_of_iterations = policies::get_max_root_iterations<Policy>();
 
    const T delta_lo = ((guess_root > 0.2F) ? T(0.2) : T(guess_root / 2U));
 
@@ -498,7 +533,7 @@ inline T cyl_neumann_zero_imp(T v, int m, const Policy& pol)
 } // namespace detail
 
 template <class T1, class T2, class Policy>
-inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_j(T1 v, T2 x, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_j(T1 v, T2 x, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
@@ -514,13 +549,13 @@ inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_j(
 }
 
 template <class T1, class T2>
-inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_j(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_j(T1 v, T2 x)
 {
    return cyl_bessel_j(v, x, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename detail::bessel_traits<T, T, Policy>::result_type sph_bessel(unsigned v, T x, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, Policy>::result_type sph_bessel(unsigned v, T x, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T, T, Policy>::result_type result_type;
@@ -535,13 +570,13 @@ inline typename detail::bessel_traits<T, T, Policy>::result_type sph_bessel(unsi
 }
 
 template <class T>
-inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_bessel(unsigned v, T x)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_bessel(unsigned v, T x)
 {
    return sph_bessel(v, x, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_i(T1 v, T2 x, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_i(T1 v, T2 x, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
@@ -556,13 +591,13 @@ inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_i(
 }
 
 template <class T1, class T2>
-inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_i(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_i(T1 v, T2 x)
 {
    return cyl_bessel_i(v, x, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_k(T1 v, T2 x, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_k(T1 v, T2 x, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
@@ -578,13 +613,13 @@ inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_k(
 }
 
 template <class T1, class T2>
-inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_k(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_k(T1 v, T2 x)
 {
    return cyl_bessel_k(v, x, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_neumann(T1 v, T2 x, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_neumann(T1 v, T2 x, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
@@ -600,13 +635,13 @@ inline typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_neumann(T
 }
 
 template <class T1, class T2>
-inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_neumann(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_neumann(T1 v, T2 x)
 {
    return cyl_neumann(v, x, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename detail::bessel_traits<T, T, Policy>::result_type sph_neumann(unsigned v, T x, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, Policy>::result_type sph_neumann(unsigned v, T x, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T, T, Policy>::result_type result_type;
@@ -621,13 +656,13 @@ inline typename detail::bessel_traits<T, T, Policy>::result_type sph_neumann(uns
 }
 
 template <class T>
-inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_neumann(unsigned v, T x)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_neumann(unsigned v, T x)
 {
    return sph_neumann(v, x, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename detail::bessel_traits<T, T, Policy>::result_type cyl_bessel_j_zero(T v, int m, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, Policy>::result_type cyl_bessel_j_zero(T v, int m, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T, T, Policy>::result_type result_type;
@@ -639,35 +674,35 @@ inline typename detail::bessel_traits<T, T, Policy>::result_type cyl_bessel_j_ze
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   static_assert(    false == std::numeric_limits<T>::is_specialized
-                           || (   true  == std::numeric_limits<T>::is_specialized
-                               && false == std::numeric_limits<T>::is_integer),
+   static_assert(    false == boost::math::numeric_limits<T>::is_specialized
+                           || (   true  == boost::math::numeric_limits<T>::is_specialized
+                               && false == boost::math::numeric_limits<T>::is_integer),
                            "Order must be a floating-point type.");
 
    return policies::checked_narrowing_cast<result_type, Policy>(detail::cyl_bessel_j_zero_imp<value_type>(v, m, forwarding_policy()), "boost::math::cyl_bessel_j_zero<%1%>(%1%,%1%)");
 }
 
 template <class T>
-inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_bessel_j_zero(T v, int m)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_bessel_j_zero(T v, int m)
 {
-   static_assert(    false == std::numeric_limits<T>::is_specialized
-                           || (   true  == std::numeric_limits<T>::is_specialized
-                               && false == std::numeric_limits<T>::is_integer),
+   static_assert(    false == boost::math::numeric_limits<T>::is_specialized
+                           || (   true  == boost::math::numeric_limits<T>::is_specialized
+                               && false == boost::math::numeric_limits<T>::is_integer),
                            "Order must be a floating-point type.");
 
    return cyl_bessel_j_zero<T, policies::policy<> >(v, m, policies::policy<>());
 }
 
 template <class T, class OutputIterator, class Policy>
-inline OutputIterator cyl_bessel_j_zero(T v,
+BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_bessel_j_zero(T v,
                               int start_index,
                               unsigned number_of_zeros,
                               OutputIterator out_it,
                               const Policy& pol)
 {
-   static_assert(    false == std::numeric_limits<T>::is_specialized
-                           || (   true  == std::numeric_limits<T>::is_specialized
-                               && false == std::numeric_limits<T>::is_integer),
+   static_assert(    false == boost::math::numeric_limits<T>::is_specialized
+                           || (   true  == boost::math::numeric_limits<T>::is_specialized
+                               && false == boost::math::numeric_limits<T>::is_integer),
                            "Order must be a floating-point type.");
 
    for(int i = 0; i < static_cast<int>(number_of_zeros); ++i)
@@ -679,7 +714,7 @@ inline OutputIterator cyl_bessel_j_zero(T v,
 }
 
 template <class T, class OutputIterator>
-inline OutputIterator cyl_bessel_j_zero(T v,
+BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_bessel_j_zero(T v,
                               int start_index,
                               unsigned number_of_zeros,
                               OutputIterator out_it)
@@ -688,7 +723,7 @@ inline OutputIterator cyl_bessel_j_zero(T v,
 }
 
 template <class T, class Policy>
-inline typename detail::bessel_traits<T, T, Policy>::result_type cyl_neumann_zero(T v, int m, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, Policy>::result_type cyl_neumann_zero(T v, int m, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T, T, Policy>::result_type result_type;
@@ -700,35 +735,35 @@ inline typename detail::bessel_traits<T, T, Policy>::result_type cyl_neumann_zer
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   static_assert(    false == std::numeric_limits<T>::is_specialized
-                           || (   true  == std::numeric_limits<T>::is_specialized
-                               && false == std::numeric_limits<T>::is_integer),
+   static_assert(    false == boost::math::numeric_limits<T>::is_specialized
+                           || (   true  == boost::math::numeric_limits<T>::is_specialized
+                               && false == boost::math::numeric_limits<T>::is_integer),
                            "Order must be a floating-point type.");
 
    return policies::checked_narrowing_cast<result_type, Policy>(detail::cyl_neumann_zero_imp<value_type>(v, m, forwarding_policy()), "boost::math::cyl_neumann_zero<%1%>(%1%,%1%)");
 }
 
 template <class T>
-inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_neumann_zero(T v, int m)
+BOOST_MATH_GPU_ENABLED inline typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_neumann_zero(T v, int m)
 {
-   static_assert(    false == std::numeric_limits<T>::is_specialized
-                           || (   true  == std::numeric_limits<T>::is_specialized
-                               && false == std::numeric_limits<T>::is_integer),
+   static_assert(    false == boost::math::numeric_limits<T>::is_specialized
+                           || (   true  == boost::math::numeric_limits<T>::is_specialized
+                               && false == boost::math::numeric_limits<T>::is_integer),
                            "Order must be a floating-point type.");
 
    return cyl_neumann_zero<T, policies::policy<> >(v, m, policies::policy<>());
 }
 
 template <class T, class OutputIterator, class Policy>
-inline OutputIterator cyl_neumann_zero(T v,
+BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_neumann_zero(T v,
                              int start_index,
                              unsigned number_of_zeros,
                              OutputIterator out_it,
                              const Policy& pol)
 {
-   static_assert(    false == std::numeric_limits<T>::is_specialized
-                           || (   true  == std::numeric_limits<T>::is_specialized
-                               && false == std::numeric_limits<T>::is_integer),
+   static_assert(    false == boost::math::numeric_limits<T>::is_specialized
+                           || (   true  == boost::math::numeric_limits<T>::is_specialized
+                               && false == boost::math::numeric_limits<T>::is_integer),
                            "Order must be a floating-point type.");
 
    for(int i = 0; i < static_cast<int>(number_of_zeros); ++i)
@@ -740,7 +775,7 @@ inline OutputIterator cyl_neumann_zero(T v,
 }
 
 template <class T, class OutputIterator>
-inline OutputIterator cyl_neumann_zero(T v,
+BOOST_MATH_GPU_ENABLED inline OutputIterator cyl_neumann_zero(T v,
                              int start_index,
                              unsigned number_of_zeros,
                              OutputIterator out_it)
diff --git a/include/boost/math/special_functions/beta.hpp b/include/boost/math/special_functions/beta.hpp
index c36e1f0d0c..27901a1131 100644
--- a/include/boost/math/special_functions/beta.hpp
+++ b/include/boost/math/special_functions/beta.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,18 +11,27 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/promotion.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/special_functions/gamma.hpp>
-#include <boost/math/special_functions/binomial.hpp>
-#include <boost/math/special_functions/factorials.hpp>
 #include <boost/math/special_functions/erf.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/special_functions/expm1.hpp>
 #include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/lanczos.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/binomial.hpp>
+#include <boost/math/special_functions/factorials.hpp>
 #include <boost/math/tools/roots.hpp>
-#include <boost/math/tools/assert.hpp>
-#include <cmath>
 
 namespace boost{ namespace math{
 
@@ -31,7 +41,7 @@ namespace detail{
 // Implementation of Beta(a,b) using the Lanczos approximation:
 //
 template <class T, class Lanczos, class Policy>
-T beta_imp(T a, T b, const Lanczos&, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T beta_imp(T a, T b, const Lanczos&, const Policy& pol)
 {
    BOOST_MATH_STD_USING  // for ADL of std names
 
@@ -85,7 +95,9 @@ T beta_imp(T a, T b, const Lanczos&, const Policy& pol)
    */
 
    if(a < b)
-      std::swap(a, b);
+   {
+      BOOST_MATH_GPU_SAFE_SWAP(a, b);
+   }
 
    // Lanczos calculation:
    T agh = static_cast<T>(a + Lanczos::g() - 0.5f);
@@ -120,8 +132,9 @@ T beta_imp(T a, T b, const Lanczos&, const Policy& pol)
 // Generic implementation of Beta(a,b) without Lanczos approximation support
 // (Caution this is slow!!!):
 //
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
 template <class T, class Policy>
-T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -190,7 +203,7 @@ T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol)
    }
 
 } // template <class T>T beta_imp(T a, T b, const lanczos::undefined_lanczos& l)
-
+#endif
 
 //
 // Compute the leading power terms in the incomplete Beta:
@@ -204,7 +217,7 @@ T beta_imp(T a, T b, const lanczos::undefined_lanczos& l, const Policy& pol)
 // horrendous cancellation errors.
 //
 template <class T, class Lanczos, class Policy>
-T ibeta_power_terms(T a,
+BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a,
                         T b,
                         T x,
                         T y,
@@ -242,11 +255,11 @@ T ibeta_power_terms(T a,
    // l1 and l2 are the base of the exponents minus one:
    T l1 = (x * b - y * agh) / agh;
    T l2 = (y * a - x * bgh) / bgh;
-   if(((std::min)(fabs(l1), fabs(l2)) < 0.2))
+   if((BOOST_MATH_GPU_SAFE_MIN(fabs(l1), fabs(l2)) < 0.2))
    {
       // when the base of the exponent is very near 1 we get really
       // gross errors unless extra care is taken:
-      if((l1 * l2 > 0) || ((std::min)(a, b) < 1))
+      if((l1 * l2 > 0) || (BOOST_MATH_GPU_SAFE_MIN(a, b) < 1))
       {
          //
          // This first branch handles the simple cases where either:
@@ -282,7 +295,7 @@ T ibeta_power_terms(T a,
             BOOST_MATH_INSTRUMENT_VARIABLE(result);
          }
       }
-      else if((std::max)(fabs(l1), fabs(l2)) < 0.5)
+      else if(BOOST_MATH_GPU_SAFE_MAX(fabs(l1), fabs(l2)) < 0.5)
       {
          //
          // Both exponents are near one and both the exponents are
@@ -444,8 +457,9 @@ T ibeta_power_terms(T a,
 //
 // This version is generic, slow, and does not use the Lanczos approximation.
 //
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
 template <class T, class Policy>
-T ibeta_power_terms(T a,
+BOOST_MATH_GPU_ENABLED T ibeta_power_terms(T a,
                         T b,
                         T x,
                         T y,
@@ -480,7 +494,7 @@ T ibeta_power_terms(T a,
       bool need_logs = false;
       if (a < b)
       {
-         BOOST_MATH_IF_CONSTEXPR(std::numeric_limits<T>::has_infinity)
+         BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits<T>::has_infinity)
          {
             power1 = pow((x * y * c * c) / (a * b), a);
             power2 = pow((y * c) / b, b - a);
@@ -503,7 +517,7 @@ T ibeta_power_terms(T a,
       }
       else
       {
-         BOOST_MATH_IF_CONSTEXPR(std::numeric_limits<T>::has_infinity)
+         BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits<T>::has_infinity)
          {
             power1 = pow((x * y * c * c) / (a * b), b);
             power2 = pow((x * c) / a, a - b);
@@ -522,7 +536,7 @@ T ibeta_power_terms(T a,
                need_logs = true;
          }
       }
-      BOOST_MATH_IF_CONSTEXPR(std::numeric_limits<T>::has_infinity)
+      BOOST_MATH_IF_CONSTEXPR(boost::math::numeric_limits<T>::has_infinity)
       {
          if (!(boost::math::isnormal)(power1) || !(boost::math::isnormal)(power2))
          {
@@ -554,7 +568,7 @@ T ibeta_power_terms(T a,
          // exp(a * log1p((xb - ya) / a + p + p(xb - ya) / a))
          //
          // Analogously, when a > b we can just swap all the terms around.
-         // 
+         //
          // Finally, there are a few cases (x or y is unity) when the above logic can't be used
          // or where there is no logarithmic cancellation and accuracy is better just using
          // the regular formula:
@@ -621,6 +635,8 @@ T ibeta_power_terms(T a,
    }
    return prefix * power1 * (power2 / bet);
 }
+
+#endif
 //
 // Series approximation to the incomplete beta:
 //
@@ -628,8 +644,8 @@ template <class T>
 struct ibeta_series_t
 {
    typedef T result_type;
-   ibeta_series_t(T a_, T b_, T x_, T mult) : result(mult), x(x_), apn(a_), poch(1-b_), n(1) {}
-   T operator()()
+   BOOST_MATH_GPU_ENABLED ibeta_series_t(T a_, T b_, T x_, T mult) : result(mult), x(x_), apn(a_), poch(1-b_), n(1) {}
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T r = result / apn;
       apn += 1;
@@ -644,7 +660,7 @@ struct ibeta_series_t
 };
 
 template <class T, class Lanczos, class Policy>
-T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_derivative, T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_derivative, T y, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -713,7 +729,7 @@ T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_deriva
    if(result < tools::min_value<T>())
       return s0; // Safeguard: series can't cope with denorms.
    ibeta_series_t<T> s(a, b, x, result);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
    result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<T, Policy>(), max_iter, s0);
    policies::check_series_iterations<T>("boost::math::ibeta<%1%>(%1%, %1%, %1%) in ibeta_series (with lanczos)", max_iter, pol);
    return result;
@@ -721,8 +737,9 @@ T ibeta_series(T a, T b, T x, T s0, const Lanczos&, bool normalised, T* p_deriva
 //
 // Incomplete Beta series again, this time without Lanczos support:
 //
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
 template <class T, class Policy>
-T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczos& l, bool normalised, T* p_derivative, T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczos& l, bool normalised, T* p_derivative, T y, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -774,23 +791,23 @@ T ibeta_series(T a, T b, T x, T s0, const boost::math::lanczos::undefined_lanczo
    if(result < tools::min_value<T>())
       return s0; // Safeguard: series can't cope with denorms.
    ibeta_series_t<T> s(a, b, x, result);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
    result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<T, Policy>(), max_iter, s0);
    policies::check_series_iterations<T>("boost::math::ibeta<%1%>(%1%, %1%, %1%) in ibeta_series (without lanczos)", max_iter, pol);
    return result;
 }
-
+#endif
 //
 // Continued fraction for the incomplete beta:
 //
 template <class T>
 struct ibeta_fraction2_t
 {
-   typedef std::pair<T, T> result_type;
+   typedef boost::math::pair<T, T> result_type;
 
-   ibeta_fraction2_t(T a_, T b_, T x_, T y_) : a(a_), b(b_), x(x_), y(y_), m(0) {}
+   BOOST_MATH_GPU_ENABLED ibeta_fraction2_t(T a_, T b_, T x_, T y_) : a(a_), b(b_), x(x_), y(y_), m(0) {}
 
-   result_type operator()()
+   BOOST_MATH_GPU_ENABLED result_type operator()()
    {
       T aN = (a + m - 1) * (a + b + m - 1) * m * (b - m) * x * x;
       T denom = (a + 2 * m - 1);
@@ -802,7 +819,7 @@ struct ibeta_fraction2_t
 
       ++m;
 
-      return std::make_pair(aN, bN);
+      return boost::math::make_pair(aN, bN);
    }
 
 private:
@@ -813,7 +830,7 @@ struct ibeta_fraction2_t
 // Evaluate the incomplete beta via the continued fraction representation:
 //
 template <class T, class Policy>
-inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, T* p_derivative)
+BOOST_MATH_GPU_ENABLED inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised, T* p_derivative)
 {
    typedef typename lanczos::lanczos<T, Policy>::type lanczos_type;
    BOOST_MATH_STD_USING
@@ -836,7 +853,7 @@ inline T ibeta_fraction2(T a, T b, T x, T y, const Policy& pol, bool normalised,
 // Computes the difference between ibeta(a,b,x) and ibeta(a+k,b,x):
 //
 template <class T, class Policy>
-T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* p_derivative)
+BOOST_MATH_GPU_ENABLED T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T* p_derivative)
 {
    typedef typename lanczos::lanczos<T, Policy>::type lanczos_type;
 
@@ -863,6 +880,7 @@ T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T*
 
    return prefix;
 }
+
 //
 // This function is only needed for the non-regular incomplete beta,
 // it computes the delta in:
@@ -870,7 +888,7 @@ T ibeta_a_step(T a, T b, T x, T y, int k, const Policy& pol, bool normalised, T*
 // it is currently only called for small k.
 //
 template <class T>
-inline T rising_factorial_ratio(T a, T b, int k)
+BOOST_MATH_GPU_ENABLED inline T rising_factorial_ratio(T a, T b, int k)
 {
    // calculate:
    // (a)(a+1)(a+2)...(a+k-1)
@@ -901,33 +919,43 @@ struct Pn_size
 {
    // This is likely to be enough for ~35-50 digit accuracy
    // but it's hard to quantify exactly:
+   #ifndef BOOST_MATH_HAS_NVRTC
    static constexpr unsigned value =
       ::boost::math::max_factorial<T>::value >= 100 ? 50
    : ::boost::math::max_factorial<T>::value >= ::boost::math::max_factorial<double>::value ? 30
    : ::boost::math::max_factorial<T>::value >= ::boost::math::max_factorial<float>::value ? 15 : 1;
    static_assert(::boost::math::max_factorial<T>::value >= ::boost::math::max_factorial<float>::value, "Type does not provide for 35-50 digits of accuracy.");
+   #else
+   static constexpr unsigned value = 0; // Will never be called
+   #endif
 };
 template <>
 struct Pn_size<float>
 {
    static constexpr unsigned value = 15; // ~8-15 digit accuracy
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
    static_assert(::boost::math::max_factorial<float>::value >= 30, "Type does not provide for 8-15 digits of accuracy.");
+#endif
 };
 template <>
 struct Pn_size<double>
 {
    static constexpr unsigned value = 30; // 16-20 digit accuracy
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
    static_assert(::boost::math::max_factorial<double>::value >= 60, "Type does not provide for 16-20 digits of accuracy.");
+#endif
 };
 template <>
 struct Pn_size<long double>
 {
    static constexpr unsigned value = 50; // ~35-50 digit accuracy
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
    static_assert(::boost::math::max_factorial<long double>::value >= 100, "Type does not provide for ~35-50 digits of accuracy");
+#endif
 };
 
 template <class T, class Policy>
-T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& pol, bool normalised)
+BOOST_MATH_GPU_ENABLED T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& pol, bool normalised)
 {
    typedef typename lanczos::lanczos<T, Policy>::type lanczos_type;
    BOOST_MATH_STD_USING
@@ -1033,7 +1061,7 @@ T beta_small_b_large_a_series(T a, T b, T x, T y, T s0, T mult, const Policy& po
 // complement of the binomial distribution cdf and use this finite sum.
 //
 template <class T, class Policy>
-T binomial_ccdf(T n, T k, T x, T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T binomial_ccdf(T n, T k, T x, T y, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -1097,10 +1125,11 @@ T binomial_ccdf(T n, T k, T x, T y, const Policy& pol)
 // input range and select the right implementation method for
 // each domain:
 //
+
 template <class T, class Policy>
-T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_derivative)
+BOOST_MATH_GPU_ENABLED T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_derivative)
 {
-   static const char* function = "boost::math::ibeta<%1%>(%1%, %1%, %1%)";
+   constexpr auto function = "boost::math::ibeta<%1%>(%1%, %1%, %1%)";
    typedef typename lanczos::lanczos<T, Policy>::type lanczos_type;
    BOOST_MATH_STD_USING // for ADL of std math functions.
 
@@ -1184,8 +1213,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de
    }
    if(a == 1)
    {
-      std::swap(a, b);
-      std::swap(x, y);
+      BOOST_MATH_GPU_SAFE_SWAP(a, b);
+      BOOST_MATH_GPU_SAFE_SWAP(x, y);
       invert = !invert;
    }
    if(b == 1)
@@ -1214,19 +1243,19 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de
       return p;
    }
 
-   if((std::min)(a, b) <= 1)
+   if(BOOST_MATH_GPU_SAFE_MIN(a, b) <= 1)
    {
       if(x > 0.5)
       {
-         std::swap(a, b);
-         std::swap(x, y);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(x, y);
          invert = !invert;
          BOOST_MATH_INSTRUMENT_VARIABLE(invert);
       }
-      if((std::max)(a, b) <= 1)
+      if(BOOST_MATH_GPU_SAFE_MAX(a, b) <= 1)
       {
          // Both a,b < 1:
-         if((a >= (std::min)(T(0.2), b)) || (pow(x, a) <= 0.9))
+         if((a >= BOOST_MATH_GPU_SAFE_MIN(T(0.2), b)) || (pow(x, a) <= 0.9))
          {
             if(!invert)
             {
@@ -1243,8 +1272,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de
          }
          else
          {
-            std::swap(a, b);
-            std::swap(x, y);
+            BOOST_MATH_GPU_SAFE_SWAP(a, b);
+            BOOST_MATH_GPU_SAFE_SWAP(x, y);
             invert = !invert;
             if(y >= 0.3)
             {
@@ -1309,8 +1338,8 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de
          }
          else
          {
-            std::swap(a, b);
-            std::swap(x, y);
+            BOOST_MATH_GPU_SAFE_SWAP(a, b);
+            BOOST_MATH_GPU_SAFE_SWAP(x, y);
             invert = !invert;
 
             if(y >= 0.3)
@@ -1387,15 +1416,15 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de
       }
       if(lambda < 0)
       {
-         std::swap(a, b);
-         std::swap(x, y);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(x, y);
          invert = !invert;
          BOOST_MATH_INSTRUMENT_VARIABLE(invert);
       }
 
       if(b < 40)
       {
-         if((floor(a) == a) && (floor(b) == b) && (a < static_cast<T>((std::numeric_limits<int>::max)() - 100)) && (y != 1))
+         if((floor(a) == a) && (floor(b) == b) && (a < static_cast<T>((boost::math::numeric_limits<int>::max)() - 100)) && (y != 1))
          {
             // relate to the binomial distribution and use a finite sum:
             T k = a - 1;
@@ -1502,15 +1531,15 @@ T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised, T* p_de
 } // template <class T, class Lanczos>T ibeta_imp(T a, T b, T x, const Lanczos& l, bool inv, bool normalised)
 
 template <class T, class Policy>
-inline T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised)
+BOOST_MATH_GPU_ENABLED inline T ibeta_imp(T a, T b, T x, const Policy& pol, bool inv, bool normalised)
 {
    return ibeta_imp(a, b, x, pol, inv, normalised, static_cast<T*>(nullptr));
 }
 
 template <class T, class Policy>
-T ibeta_derivative_imp(T a, T b, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ibeta_derivative_imp(T a, T b, T x, const Policy& pol)
 {
-   static const char* function = "ibeta_derivative<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "ibeta_derivative<%1%>(%1%,%1%,%1%)";
    //
    // start with the usual error checks:
    //
@@ -1559,8 +1588,8 @@ T ibeta_derivative_imp(T a, T b, T x, const Policy& pol)
 // Some forwarding functions that disambiguate the third argument type:
 //
 template <class RT1, class RT2, class Policy>
-inline typename tools::promote_args<RT1, RT2>::type
-   beta(RT1 a, RT2 b, const Policy&, const std::true_type*)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2>::type
+   beta(RT1 a, RT2 b, const Policy&, const boost::math::true_type*)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<RT1, RT2>::type result_type;
@@ -1576,8 +1605,8 @@ inline typename tools::promote_args<RT1, RT2>::type
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::beta_imp(static_cast<value_type>(a), static_cast<value_type>(b), evaluation_type(), forwarding_policy()), "boost::math::beta<%1%>(%1%,%1%)");
 }
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
-   beta(RT1 a, RT2 b, RT3 x, const std::false_type*)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
+   beta(RT1 a, RT2 b, RT3 x, const boost::math::false_type*)
 {
    return boost::math::beta(a, b, x, policies::policy<>());
 }
@@ -1589,7 +1618,7 @@ inline typename tools::promote_args<RT1, RT2, RT3>::type
 // and forward to the implementation functions:
 //
 template <class RT1, class RT2, class A>
-inline typename tools::promote_args<RT1, RT2, A>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, A>::type
    beta(RT1 a, RT2 b, A arg)
 {
    using tag = typename policies::is_policy<A>::type;
@@ -1598,14 +1627,14 @@ inline typename tools::promote_args<RT1, RT2, A>::type
 }
 
 template <class RT1, class RT2>
-inline typename tools::promote_args<RT1, RT2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2>::type
    beta(RT1 a, RT2 b)
 {
    return boost::math::beta(a, b, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    beta(RT1 a, RT2 b, RT3 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -1622,7 +1651,7 @@ inline typename tools::promote_args<RT1, RT2, RT3>::type
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    betac(RT1 a, RT2 b, RT3 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -1638,14 +1667,14 @@ inline typename tools::promote_args<RT1, RT2, RT3>::type
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::ibeta_imp(static_cast<value_type>(a), static_cast<value_type>(b), static_cast<value_type>(x), forwarding_policy(), true, false), "boost::math::betac<%1%>(%1%,%1%,%1%)");
 }
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    betac(RT1 a, RT2 b, RT3 x)
 {
    return boost::math::betac(a, b, x, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibeta(RT1 a, RT2 b, RT3 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -1661,14 +1690,14 @@ inline typename tools::promote_args<RT1, RT2, RT3>::type
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::ibeta_imp(static_cast<value_type>(a), static_cast<value_type>(b), static_cast<value_type>(x), forwarding_policy(), false, true), "boost::math::ibeta<%1%>(%1%,%1%,%1%)");
 }
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibeta(RT1 a, RT2 b, RT3 x)
 {
    return boost::math::ibeta(a, b, x, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibetac(RT1 a, RT2 b, RT3 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -1684,14 +1713,14 @@ inline typename tools::promote_args<RT1, RT2, RT3>::type
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::ibeta_imp(static_cast<value_type>(a), static_cast<value_type>(b), static_cast<value_type>(x), forwarding_policy(), true, true), "boost::math::ibetac<%1%>(%1%,%1%,%1%)");
 }
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibetac(RT1 a, RT2 b, RT3 x)
 {
    return boost::math::ibetac(a, b, x, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibeta_derivative(RT1 a, RT2 b, RT3 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -1707,7 +1736,7 @@ inline typename tools::promote_args<RT1, RT2, RT3>::type
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::ibeta_derivative_imp(static_cast<value_type>(a), static_cast<value_type>(b), static_cast<value_type>(x), forwarding_policy()), "boost::math::ibeta_derivative<%1%>(%1%,%1%,%1%)");
 }
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibeta_derivative(RT1 a, RT2 b, RT3 x)
 {
    return boost::math::ibeta_derivative(a, b, x, policies::policy<>());
diff --git a/include/boost/math/special_functions/binomial.hpp b/include/boost/math/special_functions/binomial.hpp
index e776a90bb8..3c49ff30d5 100644
--- a/include/boost/math/special_functions/binomial.hpp
+++ b/include/boost/math/special_functions/binomial.hpp
@@ -10,20 +10,21 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/factorials.hpp>
 #include <boost/math/special_functions/beta.hpp>
 #include <boost/math/policies/error_handling.hpp>
-#include <type_traits>
 
 namespace boost{ namespace math{
 
 template <class T, class Policy>
-T binomial_coefficient(unsigned n, unsigned k, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k, const Policy& pol)
 {
-   static_assert(!std::is_integral<T>::value, "Type T must not be an integral type");
+   static_assert(!boost::math::is_integral<T>::value, "Type T must not be an integral type");
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::binomial_coefficient<%1%>(unsigned, unsigned)";
+   constexpr auto function = "boost::math::binomial_coefficient<%1%>(unsigned, unsigned)";
    if(k > n)
       return policies::raise_domain_error<T>(function, "The binomial coefficient is undefined for k > n, but got k = %1%.", static_cast<T>(k), pol);
    T result;  // LCOV_EXCL_LINE
@@ -43,9 +44,9 @@ T binomial_coefficient(unsigned n, unsigned k, const Policy& pol)
    {
       // Use the beta function:
       if(k < n - k)
-         result = static_cast<T>(k * beta(static_cast<T>(k), static_cast<T>(n-k+1), pol));
+         result = static_cast<T>(k * boost::math::beta(static_cast<T>(k), static_cast<T>(n-k+1), pol));
       else
-         result = static_cast<T>((n - k) * beta(static_cast<T>(k+1), static_cast<T>(n-k), pol));
+         result = static_cast<T>((n - k) * boost::math::beta(static_cast<T>(k+1), static_cast<T>(n-k), pol));
       if(result == 0)
          return policies::raise_overflow_error<T>(function, nullptr, pol);
       result = 1 / result;
@@ -59,7 +60,7 @@ T binomial_coefficient(unsigned n, unsigned k, const Policy& pol)
 // we'll promote to double:
 //
 template <>
-inline float binomial_coefficient<float, policies::policy<> >(unsigned n, unsigned k, const policies::policy<>&)
+BOOST_MATH_GPU_ENABLED inline float binomial_coefficient<float, policies::policy<> >(unsigned n, unsigned k, const policies::policy<>&)
 {
    typedef policies::normalise<
        policies::policy<>,
@@ -71,7 +72,7 @@ inline float binomial_coefficient<float, policies::policy<> >(unsigned n, unsign
 }
 
 template <class T>
-inline T binomial_coefficient(unsigned n, unsigned k)
+BOOST_MATH_GPU_ENABLED inline T binomial_coefficient(unsigned n, unsigned k)
 {
    return binomial_coefficient<T>(n, k, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/cbrt.hpp b/include/boost/math/special_functions/cbrt.hpp
index 77cd5f0aec..7fdf78d014 100644
--- a/include/boost/math/special_functions/cbrt.hpp
+++ b/include/boost/math/special_functions/cbrt.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,12 +11,16 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <boost/math/tools/rational.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
-#include <type_traits>
-#include <cstdint>
 
 namespace boost{ namespace math{
 
@@ -38,7 +43,7 @@ struct largest_cbrt_int_type
 };
 
 template <typename T, typename Policy>
-T cbrt_imp(T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T cbrt_imp(T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    //
@@ -51,7 +56,7 @@ T cbrt_imp(T z, const Policy& pol)
    // Expected Error Term:                         -1.231e-006
    // Maximum Relative Change in Control Points:   5.982e-004
    //
-   static const T P[] = { 
+   BOOST_MATH_STATIC const T P[] = { 
       static_cast<T>(0.37568269008611818),
       static_cast<T>(1.3304968705558024),
       static_cast<T>(-1.4897101632445036),
@@ -59,7 +64,7 @@ T cbrt_imp(T z, const Policy& pol)
       static_cast<T>(-0.6398703759826468),
       static_cast<T>(0.13584489959258635),
    };
-   static const T correction[] = {
+   BOOST_MATH_STATIC const T correction[] = {
       static_cast<T>(0.62996052494743658238360530363911),  // 2^-2/3
       static_cast<T>(0.79370052598409973737585281963615),  // 2^-1/3
       static_cast<T>(1),
@@ -154,7 +159,7 @@ T cbrt_imp(T z, const Policy& pol)
 } // namespace detail
 
 template <typename T, typename Policy>
-inline typename tools::promote_args<T>::type cbrt(T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cbrt(T z, const Policy& pol)
 {
    using result_type = typename tools::promote_args<T>::type;
    using value_type = typename policies::evaluation<result_type, Policy>::type;
@@ -162,7 +167,7 @@ inline typename tools::promote_args<T>::type cbrt(T z, const Policy& pol)
 }
 
 template <typename T>
-inline typename tools::promote_args<T>::type cbrt(T z)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cbrt(T z)
 {
    return cbrt(z, policies::policy<>());
 }
@@ -170,6 +175,39 @@ inline typename tools::promote_args<T>::type cbrt(T z)
 } // namespace math
 } // namespace boost
 
+#else // Special NVRTC handling
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED double cbrt(T x)
+{
+   return ::cbrt(x);
+}
+
+BOOST_MATH_GPU_ENABLED inline float cbrt(float x)
+{
+   return ::cbrtf(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED double cbrt(T x, const Policy&)
+{
+   return ::cbrt(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED float cbrt(float x, const Policy&)
+{
+   return ::cbrtf(x);
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // NVRTC
+
 #endif // BOOST_MATH_SF_CBRT_HPP
 
 
diff --git a/include/boost/math/special_functions/cos_pi.hpp b/include/boost/math/special_functions/cos_pi.hpp
index e09700ec5e..7c33614de7 100644
--- a/include/boost/math/special_functions/cos_pi.hpp
+++ b/include/boost/math/special_functions/cos_pi.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2007 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,10 +11,14 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <limits>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
-#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/tools/promotion.hpp>
 #include <boost/math/constants/constants.hpp>
@@ -21,7 +26,7 @@
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-T cos_pi_imp(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED T cos_pi_imp(T x, const Policy&)
 {
    BOOST_MATH_STD_USING // ADL of std names
    // cos of pi*x:
@@ -34,7 +39,7 @@ T cos_pi_imp(T x, const Policy&)
       x = -x;
    }
    T rem = floor(x);
-   if(abs(floor(rem/2)*2 - rem) > std::numeric_limits<T>::epsilon())
+   if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits<T>::epsilon())
    {
       invert = !invert;
    }
@@ -60,7 +65,7 @@ T cos_pi_imp(T x, const Policy&)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type cos_pi(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cos_pi(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -77,12 +82,47 @@ inline typename tools::promote_args<T>::type cos_pi(T x, const Policy&)
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type cos_pi(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type cos_pi(T x)
 {
    return boost::math::cos_pi(x, policies::policy<>());
 }
 
 } // namespace math
 } // namespace boost
+
+#else // Special handling for NVRTC
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto cos_pi(T x)
+{
+   return ::cospi(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED auto cos_pi(float x)
+{
+   return ::cospif(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto cos_pi(T x, const Policy&)
+{
+   return ::cospi(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED auto cos_pi(float x, const Policy&)
+{
+   return ::cospif(x);
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTC
+
 #endif
 
diff --git a/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp b/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp
index 7735eb8589..e518422f17 100644
--- a/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp
+++ b/include/boost/math/special_functions/detail/airy_ai_bi_zero.hpp
@@ -13,6 +13,8 @@
 #ifndef BOOST_MATH_AIRY_AI_BI_ZERO_2013_01_20_HPP_
   #define BOOST_MATH_AIRY_AI_BI_ZERO_2013_01_20_HPP_
 
+  #include <boost/math/tools/config.hpp>
+  #include <boost/math/tools/tuple.hpp>
   #include <boost/math/constants/constants.hpp>
   #include <boost/math/special_functions/cbrt.hpp>
 
@@ -21,18 +23,18 @@
   {
     // Forward declarations of the needed Airy function implementations.
     template <class T, class Policy>
-    T airy_ai_imp(T x, const Policy& pol);
+    BOOST_MATH_GPU_ENABLED T airy_ai_imp(T x, const Policy& pol);
     template <class T, class Policy>
-    T airy_bi_imp(T x, const Policy& pol);
+    BOOST_MATH_GPU_ENABLED T airy_bi_imp(T x, const Policy& pol);
     template <class T, class Policy>
-    T airy_ai_prime_imp(T x, const Policy& pol);
+    BOOST_MATH_GPU_ENABLED T airy_ai_prime_imp(T x, const Policy& pol);
     template <class T, class Policy>
-    T airy_bi_prime_imp(T x, const Policy& pol);
+    BOOST_MATH_GPU_ENABLED T airy_bi_prime_imp(T x, const Policy& pol);
 
     namespace airy_zero
     {
       template<class T, class Policy>
-      T equation_as_10_4_105(const T& z, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED T equation_as_10_4_105(const T& z, const Policy& pol)
       {
         const T one_over_z        (T(1) / z);
         const T one_over_z_squared(one_over_z * one_over_z);
@@ -54,7 +56,7 @@
       namespace airy_ai_zero_detail
       {
         template<class T, class Policy>
-        T initial_guess(const int m, const Policy& pol)
+        BOOST_MATH_GPU_ENABLED T initial_guess(const int m, const Policy& pol)
         {
           T guess;
 
@@ -106,11 +108,19 @@
         class function_object_ai_and_ai_prime
         {
         public:
-          explicit function_object_ai_and_ai_prime(const Policy& pol) : my_pol(pol) { }
+          BOOST_MATH_GPU_ENABLED explicit function_object_ai_and_ai_prime(const Policy& pol) : my_pol(pol) { }
 
-          function_object_ai_and_ai_prime(const function_object_ai_and_ai_prime&) = default;
+          #ifdef BOOST_MATH_ENABLE_CUDA
+          #  pragma nv_diag_suppress 20012
+          #endif
 
-          boost::math::tuple<T, T> operator()(const T& x) const
+          BOOST_MATH_GPU_ENABLED function_object_ai_and_ai_prime(const function_object_ai_and_ai_prime&) = default;
+
+          #ifdef BOOST_MATH_ENABLE_CUDA
+          #  pragma nv_diag_default 20012
+          #endif
+
+          BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T> operator()(const T& x) const
           {
             // Return a tuple containing both Ai(x) and Ai'(x).
             return boost::math::make_tuple(
@@ -127,7 +137,7 @@
       namespace airy_bi_zero_detail
       {
         template<class T, class Policy>
-        T initial_guess(const int m, const Policy& pol)
+        BOOST_MATH_GPU_ENABLED T initial_guess(const int m, const Policy& pol)
         {
           T guess;
 
@@ -179,11 +189,19 @@
         class function_object_bi_and_bi_prime
         {
         public:
-          explicit function_object_bi_and_bi_prime(const Policy& pol) : my_pol(pol) { }
-
-          function_object_bi_and_bi_prime(const function_object_bi_and_bi_prime&) = default;
-
-          boost::math::tuple<T, T> operator()(const T& x) const
+          BOOST_MATH_GPU_ENABLED explicit function_object_bi_and_bi_prime(const Policy& pol) : my_pol(pol) { }
+
+          #ifdef BOOST_MATH_ENABLE_CUDA
+          #  pragma nv_diag_suppress 20012
+          #endif
+          
+          BOOST_MATH_GPU_ENABLED function_object_bi_and_bi_prime(const function_object_bi_and_bi_prime&) = default;
+          
+          #ifdef BOOST_MATH_ENABLE_CUDA
+          #  pragma nv_diag_default 20012
+          #endif
+
+          BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T> operator()(const T& x) const
           {
             // Return a tuple containing both Bi(x) and Bi'(x).
             return boost::math::make_tuple(
diff --git a/include/boost/math/special_functions/detail/bessel_i0.hpp b/include/boost/math/special_functions/detail/bessel_i0.hpp
index af6e8c3794..f2219cc940 100644
--- a/include/boost/math/special_functions/detail/bessel_i0.hpp
+++ b/include/boost/math/special_functions/detail/bessel_i0.hpp
@@ -1,5 +1,6 @@
 //  Copyright (c) 2006 Xiaogang Zhang
 //  Copyright (c) 2017 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,6 +15,9 @@
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/big_constant.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/precision.hpp>
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -35,24 +39,24 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T>
-T bessel_i0(const T& x);
+BOOST_MATH_GPU_ENABLED T bessel_i0(const T& x);
 
 template <typename T, int N>
-T bessel_i0_imp(const T&, const std::integral_constant<int, N>&)
+BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T&, const boost::math::integral_constant<int, N>&)
 {
    BOOST_MATH_ASSERT(0);
    return 0;
 }
 
 template <typename T>
-T bessel_i0_imp(const T& x, const std::integral_constant<int, 24>&)
+BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant<int, 24>&)
 {
    BOOST_MATH_STD_USING
    if(x < 7.75)
    {
       // Max error in interpolated form: 3.929e-08
       // Max Error found at float precision = Poly: 1.991226e-07
-      static const float P[] = {
+      BOOST_MATH_STATIC const float P[] = {
          1.00000003928615375e+00f,
          2.49999576572179639e-01f,
          2.77785268558399407e-02f,
@@ -70,7 +74,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 24>&)
    {
       // Max error in interpolated form: 5.195e-08
       // Max Error found at float precision = Poly: 8.502534e-08
-      static const float P[] = {
+      BOOST_MATH_STATIC const float P[] = {
          3.98942651588301770e-01f,
          4.98327234176892844e-02f,
          2.91866904423115499e-02f,
@@ -83,7 +87,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 24>&)
    {
       // Max error in interpolated form: 1.782e-09
       // Max Error found at float precision = Poly: 6.473568e-08
-      static const float P[] = {
+      BOOST_MATH_STATIC const float P[] = {
          3.98942391532752700e-01f,
          4.98455950638200020e-02f,
          2.94835666900682535e-02f
@@ -96,7 +100,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 24>&)
 }
 
 template <typename T>
-T bessel_i0_imp(const T& x, const std::integral_constant<int, 53>&)
+BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant<int, 53>&)
 {
    BOOST_MATH_STD_USING
    if(x < 7.75)
@@ -104,7 +108,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 53>&)
       // Bessel I0 over[10 ^ -16, 7.75]
       // Max error in interpolated form : 3.042e-18
       // Max Error found at double precision = Poly : 5.106609e-16 Cheb : 5.239199e-16
-      static const double P[] = {
+      BOOST_MATH_STATIC const double P[] = {
          1.00000000000000000e+00,
          2.49999999999999909e-01,
          2.77777777777782257e-02,
@@ -128,7 +132,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 53>&)
    {
       // Max error in interpolated form : 1.685e-16
       // Max Error found at double precision = Poly : 2.575063e-16 Cheb : 2.247615e+00
-      static const double P[] = {
+      BOOST_MATH_STATIC const double P[] = {
          3.98942280401425088e-01,
          4.98677850604961985e-02,
          2.80506233928312623e-02,
@@ -158,7 +162,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 53>&)
    {
       // Max error in interpolated form : 2.437e-18
       // Max Error found at double precision = Poly : 1.216719e-16
-      static const double P[] = {
+      BOOST_MATH_STATIC const double P[] = {
          3.98942280401432905e-01,
          4.98677850491434560e-02,
          2.80506308916506102e-02,
@@ -173,7 +177,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 53>&)
 }
 
 template <typename T>
-T bessel_i0_imp(const T& x, const std::integral_constant<int, 64>&)
+BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant<int, 64>&)
 {
    BOOST_MATH_STD_USING
    if(x < 7.75)
@@ -182,7 +186,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 64>&)
       // Max error in interpolated form : 3.899e-20
       // Max Error found at float80 precision = Poly : 1.770840e-19
       // LCOV_EXCL_START
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, 9.99999999999999999961011629e-01),
          BOOST_MATH_BIG_CONSTANT(T, 64, 2.50000000000000001321873912e-01),
          BOOST_MATH_BIG_CONSTANT(T, 64, 2.77777777777777703400424216e-02),
@@ -211,8 +215,8 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 64>&)
       // Maximum Relative Change in Control Points : 1.631e-04
       // Max Error found at float80 precision = Poly : 7.811948e-21
       // LCOV_EXCL_START
-      static const T Y = 4.051098823547363281250e-01f;
-      static const T P[] = {
+      BOOST_MATH_STATIC const T Y = 4.051098823547363281250e-01f;
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, -6.158081780620616479492e-03),
          BOOST_MATH_BIG_CONSTANT(T, 64, 4.883635969834048766148e-02),
          BOOST_MATH_BIG_CONSTANT(T, 64, 7.892782002476195771920e-02),
@@ -237,8 +241,8 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 64>&)
       // Maximum Relative Change in Control Points : 1.304e-03
       // Max Error found at float80 precision = Poly : 2.303527e-20
       // LCOV_EXCL_START
-      static const T Y = 4.033188819885253906250e-01f;
-      static const T P[] = {
+      BOOST_MATH_STATIC const T Y = 4.033188819885253906250e-01f;
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, -4.376373876116109401062e-03),
          BOOST_MATH_BIG_CONSTANT(T, 64, 4.982899138682911273321e-02),
          BOOST_MATH_BIG_CONSTANT(T, 64, 3.109477529533515397644e-02),
@@ -262,8 +266,8 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 64>&)
       // Max error in interpolated form: 1.035e-21
       // Max Error found at float80 precision = Poly: 1.885872e-21
       // LCOV_EXCL_START
-      static const T Y = 4.011702537536621093750e-01f;
-      static const T P[] = {
+      BOOST_MATH_STATIC const T Y = 4.011702537536621093750e-01f;
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, -2.227973351806078464328e-03),
          BOOST_MATH_BIG_CONSTANT(T, 64, 4.986778486088017419036e-02),
          BOOST_MATH_BIG_CONSTANT(T, 64, 2.805066823812285310011e-02),
@@ -291,7 +295,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 64>&)
       // Max error in interpolated form : 5.587e-20
       // Max Error found at float80 precision = Poly : 8.776852e-20
       // LCOV_EXCL_START
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942280401432677955074061e-01),
          BOOST_MATH_BIG_CONSTANT(T, 64, 4.98677850501789875615574058e-02),
          BOOST_MATH_BIG_CONSTANT(T, 64, 2.80506290908675604202206833e-02),
@@ -320,7 +324,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 64>&)
 }
 
 template <typename T>
-T bessel_i0_imp(const T& x, const std::integral_constant<int, 113>&)
+BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant<int, 113>&)
 {
    BOOST_MATH_STD_USING
    if(x < 7.75)
@@ -329,7 +333,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form : 1.274e-34
       // Max Error found at float128 precision = Poly : 3.096091e-34
       // LCOV_EXCL_START
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 1.0000000000000000000000000000000001273856e+00),
          BOOST_MATH_BIG_CONSTANT(T, 113, 2.4999999999999999999999999999999107477496e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, 2.7777777777777777777777777777881795230918e-02),
@@ -364,7 +368,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form : 7.534e-35
       // Max Error found at float128 precision = Poly : 6.123912e-34
       // LCOV_EXCL_START
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 9.9999999999999999992388573069504617493518e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, 2.5000000000000000007304739268173096975340e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, 2.7777777777777777744261405400543564492074e-02),
@@ -403,7 +407,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form : 1.808e-34
       // Max Error found at float128 precision = Poly : 2.399403e-34
       // LCOV_EXCL_START
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040870793650581242239624530714032e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, 4.9867780576714783790784348982178607842250e-02),
          BOOST_MATH_BIG_CONSTANT(T, 113, 2.8051948347934462928487999569249907599510e-02),
@@ -445,7 +449,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form : 1.487e-34
       // Max Error found at float128 precision = Poly : 1.929924e-34
       // LCOV_EXCL_START
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040143267793996798658172135362278e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, 4.9867785050179084714910130342157246539820e-02),
          BOOST_MATH_BIG_CONSTANT(T, 113, 2.8050629090725751585266360464766768437048e-02),
@@ -480,7 +484,7 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form : 5.459e-35
       // Max Error found at float128 precision = Poly : 1.472240e-34
       // LCOV_EXCL_START
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040143267793994605993438166526772e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, 4.9867785050179084742493257495245185241487e-02),
          BOOST_MATH_BIG_CONSTANT(T, 113, 2.8050629090725735167652437695397756897920e-02),
@@ -507,33 +511,33 @@ T bessel_i0_imp(const T& x, const std::integral_constant<int, 113>&)
 }
 
 template <typename T>
-T bessel_i0_imp(const T& x, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED T bessel_i0_imp(const T& x, const boost::math::integral_constant<int, 0>&)
 {
    if(boost::math::tools::digits<T>() <= 24)
-      return bessel_i0_imp(x, std::integral_constant<int, 24>());
+      return bessel_i0_imp(x, boost::math::integral_constant<int, 24>());
    else if(boost::math::tools::digits<T>() <= 53)
-      return bessel_i0_imp(x, std::integral_constant<int, 53>());
+      return bessel_i0_imp(x, boost::math::integral_constant<int, 53>());
    else if(boost::math::tools::digits<T>() <= 64)
-      return bessel_i0_imp(x, std::integral_constant<int, 64>());
+      return bessel_i0_imp(x, boost::math::integral_constant<int, 64>());
    else if(boost::math::tools::digits<T>() <= 113)
-      return bessel_i0_imp(x, std::integral_constant<int, 113>());
+      return bessel_i0_imp(x, boost::math::integral_constant<int, 113>());
    BOOST_MATH_ASSERT(0);
    return 0;
 }
 
 template <typename T>
-inline T bessel_i0(const T& x)
+BOOST_MATH_GPU_ENABLED inline T bessel_i0(const T& x)
 {
-   typedef std::integral_constant<int,
-      ((std::numeric_limits<T>::digits == 0) || (std::numeric_limits<T>::radix != 2)) ?
+   typedef boost::math::integral_constant<int,
+      ((boost::math::numeric_limits<T>::digits == 0) || (boost::math::numeric_limits<T>::radix != 2)) ?
       0 :
-      std::numeric_limits<T>::digits <= 24 ?
+      boost::math::numeric_limits<T>::digits <= 24 ?
       24 :
-      std::numeric_limits<T>::digits <= 53 ?
+      boost::math::numeric_limits<T>::digits <= 53 ?
       53 :
-      std::numeric_limits<T>::digits <= 64 ?
+      boost::math::numeric_limits<T>::digits <= 64 ?
       64 :
-      std::numeric_limits<T>::digits <= 113 ?
+      boost::math::numeric_limits<T>::digits <= 113 ?
       113 : -1
    > tag_type;
 
diff --git a/include/boost/math/special_functions/detail/bessel_i1.hpp b/include/boost/math/special_functions/detail/bessel_i1.hpp
index badc35de0b..d2c750df06 100644
--- a/include/boost/math/special_functions/detail/bessel_i1.hpp
+++ b/include/boost/math/special_functions/detail/bessel_i1.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2017 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -17,9 +18,13 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/big_constant.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/precision.hpp>
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -38,24 +43,24 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T>
-T bessel_i1(const T& x);
+BOOST_MATH_GPU_ENABLED T bessel_i1(const T& x);
 
 template <typename T, int N>
-T bessel_i1_imp(const T&, const std::integral_constant<int, N>&)
+BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T&, const boost::math::integral_constant<int, N>&)
 {
    BOOST_MATH_ASSERT(0);
    return 0;
 }
 
 template <typename T>
-T bessel_i1_imp(const T& x, const std::integral_constant<int, 24>&)
+BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant<int, 24>&)
 {
    BOOST_MATH_STD_USING
       if(x < 7.75)
       {
          //Max error in interpolated form : 1.348e-08
          // Max Error found at float precision = Poly : 1.469121e-07
-         static const float P[] = {
+         BOOST_MATH_STATIC const float P[] = {
             8.333333221e-02f,
             6.944453712e-03f,
             3.472097211e-04f,
@@ -74,7 +79,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 24>&)
          // Max error in interpolated form: 9.000e-08
          // Max Error found at float precision = Poly: 1.044345e-07
 
-         static const float P[] = {
+         BOOST_MATH_STATIC const float P[] = {
             3.98942115977513013e-01f,
             -1.49581264836620262e-01f,
             -4.76475741878486795e-02f,
@@ -89,7 +94,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 24>&)
 }
 
 template <typename T>
-T bessel_i1_imp(const T& x, const std::integral_constant<int, 53>&)
+BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant<int, 53>&)
 {
    BOOST_MATH_STD_USING
    if(x < 7.75)
@@ -98,7 +103,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 53>&)
       // Max error in interpolated form: 5.639e-17
       // Max Error found at double precision = Poly: 1.795559e-16
 
-      static const double P[] = {
+      BOOST_MATH_STATIC const double P[] = {
          8.333333333333333803e-02,
          6.944444444444341983e-03,
          3.472222222225921045e-04,
@@ -122,7 +127,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 53>&)
       // Max error in interpolated form: 1.796e-16
       // Max Error found at double precision = Poly: 2.898731e-16
 
-      static const double P[] = {
+      BOOST_MATH_STATIC const double P[] = {
          3.989422804014406054e-01,
          -1.496033551613111533e-01,
          -4.675104253598537322e-02,
@@ -152,7 +157,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 53>&)
    {
       // Max error in interpolated form: 1.320e-19
       // Max Error found at double precision = Poly: 7.065357e-17
-      static const double P[] = {
+      BOOST_MATH_STATIC const double P[] = {
          3.989422804014314820e-01,
          -1.496033551467584157e-01,
          -4.675105322571775911e-02,
@@ -167,7 +172,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 53>&)
 }
 
 template <typename T>
-T bessel_i1_imp(const T& x, const std::integral_constant<int, 64>&)
+BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant<int, 64>&)
 {
    BOOST_MATH_STD_USING
       if(x < 7.75)
@@ -175,7 +180,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 64>&)
          // Bessel I0 over[10 ^ -16, 7.75]
          // Max error in interpolated form: 8.086e-21
          // Max Error found at float80 precision = Poly: 7.225090e-20
-         static const T P[] = {
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 8.33333333333333333340071817e-02),
             BOOST_MATH_BIG_CONSTANT(T, 64, 6.94444444444444442462728070e-03),
             BOOST_MATH_BIG_CONSTANT(T, 64, 3.47222222222222318886683883e-04),
@@ -203,7 +208,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 64>&)
          // Maximum Deviation Found : 3.887e-20
          // Expected Error Term : 3.887e-20
          // Maximum Relative Change in Control Points : 1.681e-04
-         static const T P[] = {
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942260530218897338680e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.49599542849073670179540e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.70492865454119188276875e-02),
@@ -236,7 +241,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 64>&)
          // Maximum Relative Change in Control Points : 2.101e-03
          // Max Error found at float80 precision = Poly : 6.029974e-20
 
-         static const T P[] = {
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942280401431675205845e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.49603355149968887210170e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.67510486284376330257260e-02),
@@ -258,7 +263,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 64>&)
          // Bessel I0 over[100, INF]
          // Max error in interpolated form: 2.456e-20
          // Max Error found at float80 precision = Poly: 5.446356e-20
-         static const T P[] = {
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 3.98942280401432677958445e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.49603355150537411254359e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.67510484842456251368526e-02),
@@ -276,7 +281,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 64>&)
 }
 
 template <typename T>
-T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
+BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant<int, 113>&)
 {
    BOOST_MATH_STD_USING
    if(x < 7.75)
@@ -285,7 +290,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form: 1.835e-35
       // Max Error found at float128 precision = Poly: 1.645036e-34
 
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 8.3333333333333333333333333333333331804098e-02),
          BOOST_MATH_BIG_CONSTANT(T, 113, 6.9444444444444444444444444444445418303082e-03),
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.4722222222222222222222222222119082346591e-04),
@@ -321,7 +326,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
       // Maximum Relative Change in Control Points : 5.204e-03
       // Max Error found at float128 precision = Poly : 2.882561e-34
 
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 8.333333333333333326889717360850080939e-02),
          BOOST_MATH_BIG_CONSTANT(T, 113, 6.944444444444444511272790848815114507e-03),
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.472222222222221892451965054394153443e-04),
@@ -355,7 +360,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
       // Maximum Deviation Found : 1.766e-35
       // Expected Error Term : 1.021e-35
       // Maximum Relative Change in Control Points : 6.228e-03
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 8.333333333333255774414858563409941233e-02),
          BOOST_MATH_BIG_CONSTANT(T, 113, 6.944444444444897867884955912228700291e-03),
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.472222222220954970397343617150959467e-04),
@@ -389,7 +394,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
    {
       // Max error in interpolated form: 8.864e-36
       // Max Error found at float128 precision = Poly: 8.522841e-35
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.989422793693152031514179994954750043e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -1.496029423752889591425633234009799670e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -4.682975926820553021482820043377990241e-02),
@@ -421,7 +426,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form: 6.028e-35
       // Max Error found at float128 precision = Poly: 1.368313e-34
 
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.989422804012941975429616956496046931e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -1.496033550576049830976679315420681402e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -4.675107835141866009896710750800622147e-02),
@@ -456,7 +461,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
       // Max error in interpolated form: 5.494e-35
       // Max Error found at float128 precision = Poly: 1.214651e-34
 
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.989422804014326779399307367861631577e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -1.496033551505372542086590873271571919e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -4.675104848454290286276466276677172664e-02),
@@ -486,7 +491,7 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
       // Bessel I0 over[100, INF]
       // Max error in interpolated form: 6.081e-35
       // Max Error found at float128 precision = Poly: 1.407151e-34
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 113, 3.9894228040143267793994605993438200208417e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -1.4960335515053725422747977247811372936584e-01),
          BOOST_MATH_BIG_CONSTANT(T, 113, -4.6751048484542891946087411826356811991039e-02),
@@ -512,33 +517,33 @@ T bessel_i1_imp(const T& x, const std::integral_constant<int, 113>&)
 }
 
 template <typename T>
-T bessel_i1_imp(const T& x, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED T bessel_i1_imp(const T& x, const boost::math::integral_constant<int, 0>&)
 {
    if(boost::math::tools::digits<T>() <= 24)
-      return bessel_i1_imp(x, std::integral_constant<int, 24>());
+      return bessel_i1_imp(x, boost::math::integral_constant<int, 24>());
    else if(boost::math::tools::digits<T>() <= 53)
-      return bessel_i1_imp(x, std::integral_constant<int, 53>());
+      return bessel_i1_imp(x, boost::math::integral_constant<int, 53>());
    else if(boost::math::tools::digits<T>() <= 64)
-      return bessel_i1_imp(x, std::integral_constant<int, 64>());
+      return bessel_i1_imp(x, boost::math::integral_constant<int, 64>());
    else if(boost::math::tools::digits<T>() <= 113)
-      return bessel_i1_imp(x, std::integral_constant<int, 113>());
+      return bessel_i1_imp(x, boost::math::integral_constant<int, 113>());
    BOOST_MATH_ASSERT(0);
    return 0;
 }
 
 template <typename T>
-inline T bessel_i1(const T& x)
+inline BOOST_MATH_GPU_ENABLED T bessel_i1(const T& x)
 {
-   typedef std::integral_constant<int,
-      ((std::numeric_limits<T>::digits == 0) || (std::numeric_limits<T>::radix != 2)) ?
+   typedef boost::math::integral_constant<int,
+      ((boost::math::numeric_limits<T>::digits == 0) || (boost::math::numeric_limits<T>::radix != 2)) ?
       0 :
-      std::numeric_limits<T>::digits <= 24 ?
+      boost::math::numeric_limits<T>::digits <= 24 ?
       24 :
-      std::numeric_limits<T>::digits <= 53 ?
+      boost::math::numeric_limits<T>::digits <= 53 ?
       53 :
-      std::numeric_limits<T>::digits <= 64 ?
+      boost::math::numeric_limits<T>::digits <= 64 ?
       64 :
-      std::numeric_limits<T>::digits <= 113 ?
+      boost::math::numeric_limits<T>::digits <= 113 ?
       113 : -1
    > tag_type;
 
diff --git a/include/boost/math/special_functions/detail/bessel_ik.hpp b/include/boost/math/special_functions/detail/bessel_ik.hpp
index 0c653b4753..b3e7378fd4 100644
--- a/include/boost/math/special_functions/detail/bessel_ik.hpp
+++ b/include/boost/math/special_functions/detail/bessel_ik.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2006 Xiaogang Zhang
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,14 +11,17 @@
 #pragma once
 #endif
 
-#include <cmath>
-#include <cstdint>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/series.hpp>
+#include <boost/math/special_functions/sign.hpp>
 #include <boost/math/special_functions/round.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/special_functions/sin_pi.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/policies/error_handling.hpp>
-#include <boost/math/tools/config.hpp>
 
 // Modified Bessel functions of the first and second kind of fractional order
 
@@ -30,13 +34,13 @@ struct cyl_bessel_i_small_z
 {
    typedef T result_type;
 
-   cyl_bessel_i_small_z(T v_, T z_) : k(0), v(v_), mult(z_*z_/4)
+   BOOST_MATH_GPU_ENABLED cyl_bessel_i_small_z(T v_, T z_) : k(0), v(v_), mult(z_*z_/4)
    {
       BOOST_MATH_STD_USING
       term = 1;
    }
 
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T result = term;
       ++k;
@@ -52,7 +56,7 @@ struct cyl_bessel_i_small_z
 };
 
 template <class T, class Policy>
-inline T bessel_i_small_z_series(T v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T bessel_i_small_z_series(T v, T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    T prefix;
@@ -69,7 +73,7 @@ inline T bessel_i_small_z_series(T v, T x, const Policy& pol)
       return prefix;
 
    cyl_bessel_i_small_z<T, Policy> s(v, x);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
    T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<T, Policy>(), max_iter);
 
@@ -80,7 +84,7 @@ inline T bessel_i_small_z_series(T v, T x, const Policy& pol)
 // Calculate K(v, x) and K(v+1, x) by method analogous to
 // Temme, Journal of Computational Physics, vol 21, 343 (1976)
 template <typename T, typename Policy>
-int temme_ik(T v, T x, T* result_K, T* K1, const Policy& pol)
+BOOST_MATH_GPU_ENABLED int temme_ik(T v, T x, T* result_K, T* K1, const Policy& pol)
 {
     T f, h, p, q, coef, sum, sum1, tolerance;
     T a, b, c, d, sigma, gamma1, gamma2;
@@ -157,7 +161,7 @@ int temme_ik(T v, T x, T* result_K, T* K1, const Policy& pol)
 // Evaluate continued fraction fv = I_(v+1) / I_v, derived from
 // Abramowitz and Stegun, Handbook of Mathematical Functions, 1972, 9.1.73
 template <typename T, typename Policy>
-int CF1_ik(T v, T x, T* fv, const Policy& pol)
+BOOST_MATH_GPU_ENABLED int CF1_ik(T v, T x, T* fv, const Policy& pol)
 {
     T C, D, f, a, b, delta, tiny, tolerance;
     unsigned long k;
@@ -204,7 +208,7 @@ int CF1_ik(T v, T x, T* fv, const Policy& pol)
 // z1 / z0 = U(v+1.5, 2v+1, 2x) / U(v+0.5, 2v+1, 2x), see
 // Thompson and Barnett, Computer Physics Communications, vol 47, 245 (1987)
 template <typename T, typename Policy>
-int CF2_ik(T v, T x, T* Kv, T* Kv1, const Policy& pol)
+BOOST_MATH_GPU_ENABLED int CF2_ik(T v, T x, T* Kv, T* Kv1, const Policy& pol)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::constants;
@@ -297,7 +301,7 @@ enum{
 // Compute I(v, x) and K(v, x) simultaneously by Temme's method, see
 // Temme, Journal of Computational Physics, vol 19, 324 (1975)
 template <typename T, typename Policy>
-int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol)
+BOOST_MATH_GPU_ENABLED int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol)
 {
     // Kv1 = K_(v+1), fv = I_(v+1) / I_v
     // Ku1 = K_(u+1), fu = I_(u+1) / I_u
@@ -314,7 +318,7 @@ int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol)
     using namespace boost::math::tools;
     using namespace boost::math::constants;
 
-    static const char* function = "boost::math::bessel_ik<%1%>(%1%,%1%)";
+    constexpr auto function = "boost::math::bessel_ik<%1%>(%1%,%1%)";
 
     if (v < 0)
     {
@@ -329,7 +333,7 @@ int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol)
     if (((kind & need_i) == 0) && (fabs(4 * v * v - 25) / (8 * x) < tools::forth_root_epsilon<T>()))
     {
        // A&S 9.7.2
-       Iv = std::numeric_limits<T>::quiet_NaN(); // any value will do
+       Iv = boost::math::numeric_limits<T>::quiet_NaN(); // any value will do
        T mu = 4 * v * v;
        T eight_z = 8 * x;
        Kv = 1 + (mu - 1) / eight_z + (mu - 1) * (mu - 9) / (2 * eight_z * eight_z) + (mu - 1) * (mu - 9) * (mu - 25) / (6 * eight_z * eight_z * eight_z);
@@ -410,7 +414,7 @@ int bessel_ik(T v, T x, T* result_I, T* result_K, int kind, const Policy& pol)
           }
        }
        else
-          Iv = std::numeric_limits<T>::quiet_NaN(); // any value will do
+          Iv = boost::math::numeric_limits<T>::quiet_NaN(); // any value will do
     }
     if (reflect)
     {
diff --git a/include/boost/math/special_functions/detail/bessel_j0.hpp b/include/boost/math/special_functions/detail/bessel_j0.hpp
index 9a0b26fe6b..2df027b21d 100644
--- a/include/boost/math/special_functions/detail/bessel_j0.hpp
+++ b/include/boost/math/special_functions/detail/bessel_j0.hpp
@@ -10,6 +10,7 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/big_constant.hpp>
@@ -32,10 +33,10 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T>
-T bessel_j0(T x);
+BOOST_MATH_GPU_ENABLED T bessel_j0(T x);
 
 template <typename T>
-T bessel_j0(T x)
+BOOST_MATH_GPU_ENABLED T bessel_j0(T x)
 {
 #ifdef BOOST_MATH_INSTRUMENT
     static bool b = false;
@@ -48,7 +49,7 @@ T bessel_j0(T x)
     }
 #endif
 
-    static const T P1[] = {
+    BOOST_MATH_STATIC const T P1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -4.1298668500990866786e+11)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.7282507878605942706e+10)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -6.2140700423540120665e+08)),
@@ -57,7 +58,7 @@ T bessel_j0(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0344222815443188943e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2117036164593528341e-01))
     };
-    static const T Q1[] = {
+    BOOST_MATH_STATIC const T Q1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.3883787996332290397e+12)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.6328198300859648632e+10)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.3985097372263433271e+08)),
@@ -66,7 +67,7 @@ T bessel_j0(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0))
     };
-    static const T P2[] = {
+    BOOST_MATH_STATIC const T P2[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8319397969392084011e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2254078161378989535e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -7.2879702464464618998e+03)),
@@ -76,7 +77,7 @@ T bessel_j0(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.4321196680624245801e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.8591703355916499363e+01))
     };
-    static const T Q2[] = {
+    BOOST_MATH_STATIC const T Q2[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.5783478026152301072e+05)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.4599102262586308984e+05)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -8.4055062591169562211e+04)),
@@ -86,7 +87,7 @@ T bessel_j0(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -2.5258076240801555057e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0))
     };
-    static const T PC[] = {
+    BOOST_MATH_STATIC const T PC[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684302e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1345386639580765797e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1170523380864944322e+04)),
@@ -94,7 +95,7 @@ T bessel_j0(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5376201909008354296e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.8961548424210455236e-01))
     };
-    static const T QC[] = {
+    BOOST_MATH_STATIC const T QC[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684318e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1370412495510416640e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1215350561880115730e+04)),
@@ -102,7 +103,7 @@ T bessel_j0(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5711159858080893649e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0))
     };
-    static const T PS[] = {
+    BOOST_MATH_STATIC const T PS[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -8.9226600200800094098e+01)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8591953644342993800e+02)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.1183429920482737611e+02)),
@@ -110,7 +111,7 @@ T bessel_j0(T x)
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2441026745835638459e+00)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -8.8033303048680751817e-03))
     };
-    static const T QS[] = {
+    BOOST_MATH_STATIC const T QS[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.7105024128512061905e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.1951131543434613647e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.2642780169211018836e+03)),
@@ -118,12 +119,13 @@ T bessel_j0(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 9.0593769594993125859e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0))
     };
-    static const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.4048255576957727686e+00)),
-                   x2  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.5200781102863106496e+00)),
-                   x11 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 6.160e+02)),
-                   x12 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.42444230422723137837e-03)),
-                   x21 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4130e+03)),
-                   x22 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.46860286310649596604e-04));
+
+    BOOST_MATH_STATIC const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.4048255576957727686e+00));
+    BOOST_MATH_STATIC const T x2  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.5200781102863106496e+00));
+    BOOST_MATH_STATIC const T x11 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 6.160e+02));
+    BOOST_MATH_STATIC const T x12 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.42444230422723137837e-03));
+    BOOST_MATH_STATIC const T x21 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4130e+03));
+    BOOST_MATH_STATIC const T x22 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.46860286310649596604e-04));
 
     T value, factor, r, rc, rs;
 
diff --git a/include/boost/math/special_functions/detail/bessel_j1.hpp b/include/boost/math/special_functions/detail/bessel_j1.hpp
index 6d354dcce7..43df9fa0c1 100644
--- a/include/boost/math/special_functions/detail/bessel_j1.hpp
+++ b/include/boost/math/special_functions/detail/bessel_j1.hpp
@@ -10,6 +10,7 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/big_constant.hpp>
@@ -32,27 +33,29 @@
 namespace boost { namespace math{  namespace detail{
 
 template <typename T>
-T bessel_j1(T x);
+BOOST_MATH_GPU_ENABLED T bessel_j1(T x);
 
 template <class T>
 struct bessel_j1_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          do_init();
       }
-      static void do_init()
+      BOOST_MATH_GPU_ENABLED static void do_init()
       {
          bessel_j1(T(1));
       }
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
-   static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_STATIC const init initializer;
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -60,11 +63,11 @@ template <class T>
 const typename bessel_j1_initializer<T>::init bessel_j1_initializer<T>::initializer;
 
 template <typename T>
-T bessel_j1(T x)
+BOOST_MATH_GPU_ENABLED T bessel_j1(T x)
 {
     bessel_j1_initializer<T>::force_instantiate();
 
-    static const T P1[] = {
+    BOOST_MATH_STATIC const T P1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4258509801366645672e+11)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6781041261492395835e+09)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.1548696764841276794e+08)),
@@ -73,7 +76,7 @@ T bessel_j1(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0650724020080236441e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.0767857011487300348e-02))
     };
-    static const T Q1[] = {
+    BOOST_MATH_STATIC const T Q1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1868604460820175290e+12)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.2091902282580133541e+10)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.0228375140097033958e+08)),
@@ -82,7 +85,7 @@ T bessel_j1(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0))
     };
-    static const T P2[] = {
+    BOOST_MATH_STATIC const T P2[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.7527881995806511112e+16)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.6608531731299018674e+15)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.6658018905416665164e+13)),
@@ -92,7 +95,7 @@ T bessel_j1(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -7.5023342220781607561e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.6179191852758252278e+00))
     };
-    static const T Q2[] = {
+    BOOST_MATH_STATIC const T Q2[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7253905888447681194e+18)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7128800897135812012e+16)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.4899346165481429307e+13)),
@@ -102,7 +105,7 @@ T bessel_j1(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.3886978985861357615e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0))
     };
-    static const T PC[] = {
+    BOOST_MATH_STATIC const T PC[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278571e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9422465050776411957e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -6.6033732483649391093e+06)),
@@ -111,7 +114,7 @@ T bessel_j1(T x)
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.6116166443246101165e+03)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0))
     };
-    static const T QC[] = {
+    BOOST_MATH_STATIC const T QC[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278568e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9341243899345856590e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -6.5853394797230870728e+06)),
@@ -120,7 +123,7 @@ T bessel_j1(T x)
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4550094401904961825e+03)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0))
     };
-    static const T PS[] = {
+    BOOST_MATH_STATIC const T PS[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.3220913409857223519e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.5145160675335701966e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6178836581270835179e+04)),
@@ -129,7 +132,7 @@ T bessel_j1(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.5265133846636032186e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0))
     };
-    static const T QS[] = {
+    BOOST_MATH_STATIC const T QS[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0871281941028743574e+05)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.8194580422439972989e+06)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4194606696037208929e+06)),
@@ -138,12 +141,13 @@ T bessel_j1(T x)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.6383677696049909675e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0))
     };
-    static const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.8317059702075123156e+00)),
-                   x2  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0155866698156187535e+00)),
-                   x11 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 9.810e+02)),
-                   x12 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.2527979248768438556e-04)),
-                   x21 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7960e+03)),
-                   x22 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.8330184381246462950e-05));
+
+    BOOST_MATH_STATIC const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.8317059702075123156e+00));
+    BOOST_MATH_STATIC const T x2  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0155866698156187535e+00));
+    BOOST_MATH_STATIC const T x11 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 9.810e+02));
+    BOOST_MATH_STATIC const T x12 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.2527979248768438556e-04));
+    BOOST_MATH_STATIC const T x21 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7960e+03));
+    BOOST_MATH_STATIC const T x22 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.8330184381246462950e-05));
 
     T value, factor, r, rc, rs, w;
 
diff --git a/include/boost/math/special_functions/detail/bessel_jn.hpp b/include/boost/math/special_functions/detail/bessel_jn.hpp
index a08af05485..73bc0c5621 100644
--- a/include/boost/math/special_functions/detail/bessel_jn.hpp
+++ b/include/boost/math/special_functions/detail/bessel_jn.hpp
@@ -10,6 +10,10 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/assert.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/special_functions/detail/bessel_j0.hpp>
 #include <boost/math/special_functions/detail/bessel_j1.hpp>
 #include <boost/math/special_functions/detail/bessel_jy.hpp>
@@ -24,7 +28,7 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T bessel_jn(int n, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T bessel_jn(int n, T x, const Policy& pol)
 {
     T value(0), factor, current, prev, next;
 
diff --git a/include/boost/math/special_functions/detail/bessel_jy.hpp b/include/boost/math/special_functions/detail/bessel_jy.hpp
index 90e099eb77..143dce872c 100644
--- a/include/boost/math/special_functions/detail/bessel_jy.hpp
+++ b/include/boost/math/special_functions/detail/bessel_jy.hpp
@@ -11,16 +11,18 @@
 #endif
 
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/special_functions/sign.hpp>
 #include <boost/math/special_functions/hypot.hpp>
 #include <boost/math/special_functions/sin_pi.hpp>
 #include <boost/math/special_functions/cos_pi.hpp>
+#include <boost/math/special_functions/round.hpp>
 #include <boost/math/special_functions/detail/bessel_jy_asym.hpp>
 #include <boost/math/special_functions/detail/bessel_jy_series.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/policies/error_handling.hpp>
-#include <complex>
 
 // Bessel functions of the first and second kind of fractional order
 
@@ -38,7 +40,7 @@ namespace boost { namespace math {
       // try it and see...
       //
       template <class T, class Policy>
-      bool hankel_PQ(T v, T x, T* p, T* q, const Policy& )
+      BOOST_MATH_GPU_ENABLED bool hankel_PQ(T v, T x, T* p, T* q, const Policy& )
       {
          BOOST_MATH_STD_USING
             T tolerance = 2 * policies::get_epsilon<T, Policy>();
@@ -70,7 +72,7 @@ namespace boost { namespace math {
       // Calculate Y(v, x) and Y(v+1, x) by Temme's method, see
       // Temme, Journal of Computational Physics, vol 21, 343 (1976)
       template <typename T, typename Policy>
-      int temme_jy(T v, T x, T* Y, T* Y1, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED int temme_jy(T v, T x, T* Y, T* Y1, const Policy& pol)
       {
          T g, h, p, q, f, coef, sum, sum1, tolerance;
          T a, d, e, sigma;
@@ -139,7 +141,7 @@ namespace boost { namespace math {
       // Evaluate continued fraction fv = J_(v+1) / J_v, see
       // Abramowitz and Stegun, Handbook of Mathematical Functions, 1972, 9.1.73
       template <typename T, typename Policy>
-      int CF1_jy(T v, T x, T* fv, int* sign, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED int CF1_jy(T v, T x, T* fv, int* sign, const Policy& pol)
       {
          T C, D, f, a, b, delta, tiny, tolerance;
          unsigned long k;
@@ -185,7 +187,7 @@ namespace boost { namespace math {
       // real values only.
       //
       template <typename T, typename Policy>
-      int CF2_jy(T v, T x, T* p, T* q, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED int CF2_jy(T v, T x, T* p, T* q, const Policy& pol)
       {
          BOOST_MATH_STD_USING
 
@@ -254,13 +256,13 @@ namespace boost { namespace math {
          return 0;
       }
 
-      static const int need_j = 1;
-      static const int need_y = 2;
+      BOOST_MATH_STATIC const int need_j = 1;
+      BOOST_MATH_STATIC const int need_y = 2;
 
       // Compute J(v, x) and Y(v, x) simultaneously by Steed's method, see
       // Barnett et al, Computer Physics Communications, vol 8, 377 (1974)
       template <typename T, typename Policy>
-      int bessel_jy(T v, T x, T* J, T* Y, int kind, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED int bessel_jy(T v, T x, T* J, T* Y, int kind, const Policy& pol)
       {
          BOOST_MATH_ASSERT(x >= 0);
 
@@ -273,7 +275,7 @@ namespace boost { namespace math {
          T cp = 0;
          T sp = 0;
 
-         static const char* function = "boost::math::bessel_jy<%1%>(%1%,%1%)";
+         constexpr auto function = "boost::math::bessel_jy<%1%>(%1%,%1%)";
 
          BOOST_MATH_STD_USING
             using namespace boost::math::tools;
@@ -284,7 +286,7 @@ namespace boost { namespace math {
             reflect = true;
             v = -v;                             // v is non-negative from here
          }
-         if (v > static_cast<T>((std::numeric_limits<int>::max)()))
+         if (v > static_cast<T>((boost::math::numeric_limits<int>::max)()))
          {
             *J = *Y = policies::raise_evaluation_error<T>(function, "Order of Bessel function is too large to evaluate: got %1%", v, pol);
             return 1;  // LCOV_EXCL_LINE previous line will throw.
@@ -310,10 +312,10 @@ namespace boost { namespace math {
             else if(kind & need_j)
                *J = policies::raise_domain_error<T>(function, "Value of Bessel J_v(x) is complex-infinity at %1%", x, pol); // complex infinity
             else
-               *J = std::numeric_limits<T>::quiet_NaN();  // LCOV_EXCL_LINE, we should never get here, any value will do, not using J.
+               *J = boost::math::numeric_limits<T>::quiet_NaN();  // LCOV_EXCL_LINE, we should never get here, any value will do, not using J.
 
             if((kind & need_y) == 0)
-               *Y = std::numeric_limits<T>::quiet_NaN();  // any value will do, not using Y.
+               *Y = boost::math::numeric_limits<T>::quiet_NaN();  // any value will do, not using Y.
             else
             {
                // We shoud never get here:
@@ -333,7 +335,7 @@ namespace boost { namespace math {
             // and divergent which leads to large errors :-(
             //
             Jv = bessel_j_small_z_series(v, x, pol);
-            Yv = std::numeric_limits<T>::quiet_NaN();
+            Yv = boost::math::numeric_limits<T>::quiet_NaN();
          }
          else if((x < 1) && (u != 0) && (log(policies::get_epsilon<T, Policy>() / 2) > v * log((x/2) * (x/2) / v)))
          {
@@ -344,7 +346,7 @@ namespace boost { namespace math {
             if(kind&need_j)
                Jv = bessel_j_small_z_series(v, x, pol);
             else
-               Jv = std::numeric_limits<T>::quiet_NaN();
+               Jv = boost::math::numeric_limits<T>::quiet_NaN();
             if((org_kind&need_y && (!reflect || (cp != 0)))
                || (org_kind & need_j && (reflect && (sp != 0))))
             {
@@ -352,7 +354,7 @@ namespace boost { namespace math {
                Yv = bessel_y_small_z_series(v, x, &Yv_scale, pol);
             }
             else
-               Yv = std::numeric_limits<T>::quiet_NaN();
+               Yv = boost::math::numeric_limits<T>::quiet_NaN();
          }
          else if((u == 0) && (x < policies::get_epsilon<T, Policy>()))
          {
@@ -363,7 +365,7 @@ namespace boost { namespace math {
             if(kind&need_j)
                Jv = bessel_j_small_z_series(v, x, pol);
             else
-               Jv = std::numeric_limits<T>::quiet_NaN();
+               Jv = boost::math::numeric_limits<T>::quiet_NaN();
             if((org_kind&need_y && (!reflect || (cp != 0)))
                || (org_kind & need_j && (reflect && (sp != 0))))
             {
@@ -371,7 +373,7 @@ namespace boost { namespace math {
                Yv = bessel_yn_small_z(n, x, &Yv_scale, pol);
             }
             else
-               Yv = std::numeric_limits<T>::quiet_NaN();
+               Yv = boost::math::numeric_limits<T>::quiet_NaN();
             // LCOV_EXCL_STOP
          }
          else if(asymptotic_bessel_large_x_limit(v, x))
@@ -381,13 +383,13 @@ namespace boost { namespace math {
                Yv = asymptotic_bessel_y_large_x_2(v, x, pol);
             }
             else
-               Yv = std::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
+               Yv = boost::math::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
             if(kind&need_j)
             {
                Jv = asymptotic_bessel_j_large_x_2(v, x, pol);
             }
             else
-               Jv = std::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
+               Jv = boost::math::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
          }
          else if((x > 8) && hankel_PQ(v, x, &p, &q, pol))
          {
@@ -449,7 +451,7 @@ namespace boost { namespace math {
                Jv = scale * W / (Yv * fv - Yv1);           // Wronskian relation
             }
             else
-               Jv = std::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
+               Jv = boost::math::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
             Yv_scale = scale;
          }
          else                                    // x in (2, \infty)
@@ -564,7 +566,7 @@ namespace boost { namespace math {
                Yv = prev;
             }
             else
-               Yv = std::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
+               Yv = boost::math::numeric_limits<T>::quiet_NaN(); // any value will do, we're not using it.
          }
 
          if (reflect)
diff --git a/include/boost/math/special_functions/detail/bessel_jy_asym.hpp b/include/boost/math/special_functions/detail/bessel_jy_asym.hpp
index cb09b202d5..51e4efafca 100644
--- a/include/boost/math/special_functions/detail/bessel_jy_asym.hpp
+++ b/include/boost/math/special_functions/detail/bessel_jy_asym.hpp
@@ -16,12 +16,15 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/constants/constants.hpp>
 #include <boost/math/special_functions/factorials.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
 
 namespace boost{ namespace math{ namespace detail{
 
 template <class T>
-inline T asymptotic_bessel_amplitude(T v, T x)
+BOOST_MATH_GPU_ENABLED inline T asymptotic_bessel_amplitude(T v, T x)
 {
    // Calculate the amplitude of J(v, x) and Y(v, x) for large
    // x: see A&S 9.2.28.
@@ -39,7 +42,7 @@ inline T asymptotic_bessel_amplitude(T v, T x)
 }
 
 template <class T>
-T asymptotic_bessel_phase_mx(T v, T x)
+BOOST_MATH_GPU_ENABLED T asymptotic_bessel_phase_mx(T v, T x)
 {
    //
    // Calculate the phase of J(v, x) and Y(v, x) for large x.
@@ -63,7 +66,7 @@ T asymptotic_bessel_phase_mx(T v, T x)
 }
 
 template <class T, class Policy>
-inline T asymptotic_bessel_y_large_x_2(T v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T asymptotic_bessel_y_large_x_2(T v, T x, const Policy& pol)
 {
    // See A&S 9.2.19.
    BOOST_MATH_STD_USING
@@ -93,7 +96,7 @@ inline T asymptotic_bessel_y_large_x_2(T v, T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-inline T asymptotic_bessel_j_large_x_2(T v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T asymptotic_bessel_j_large_x_2(T v, T x, const Policy& pol)
 {
    // See A&S 9.2.19.
    BOOST_MATH_STD_USING
@@ -124,7 +127,7 @@ inline T asymptotic_bessel_j_large_x_2(T v, T x, const Policy& pol)
 }
 
 template <class T>
-inline bool asymptotic_bessel_large_x_limit(int v, const T& x)
+BOOST_MATH_GPU_ENABLED inline bool asymptotic_bessel_large_x_limit(int v, const T& x)
 {
    BOOST_MATH_STD_USING
       //
@@ -142,7 +145,7 @@ inline bool asymptotic_bessel_large_x_limit(int v, const T& x)
 }
 
 template <class T>
-inline bool asymptotic_bessel_large_x_limit(const T& v, const T& x)
+BOOST_MATH_GPU_ENABLED inline bool asymptotic_bessel_large_x_limit(const T& v, const T& x)
 {
    BOOST_MATH_STD_USING
    //
@@ -155,11 +158,11 @@ inline bool asymptotic_bessel_large_x_limit(const T& v, const T& x)
    // error rates either side of the divide for v < 10000.
    // At double precision eps^1/8 ~= 0.01.
    //
-   return (std::max)(T(fabs(v)), T(1)) < x * sqrt(tools::forth_root_epsilon<T>());
+   return BOOST_MATH_GPU_SAFE_MAX(T(fabs(v)), T(1)) < x * sqrt(tools::forth_root_epsilon<T>());
 }
 
 template <class T, class Policy>
-void temme_asymptotic_y_small_x(T v, T x, T* Y, T* Y1, const Policy& pol)
+BOOST_MATH_GPU_ENABLED void temme_asymptotic_y_small_x(T v, T x, T* Y, T* Y1, const Policy& pol)
 {
    T c = 1;
    T p = (v / boost::math::sin_pi(v, pol)) * pow(x / 2, -v) / boost::math::tgamma(1 - v, pol);
@@ -193,7 +196,7 @@ void temme_asymptotic_y_small_x(T v, T x, T* Y, T* Y1, const Policy& pol)
 }
 
 template <class T, class Policy>
-T asymptotic_bessel_i_large_x(T v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T asymptotic_bessel_i_large_x(T v, T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING  // ADL of std names
    T s = 1;
diff --git a/include/boost/math/special_functions/detail/bessel_jy_series.hpp b/include/boost/math/special_functions/detail/bessel_jy_series.hpp
index db46f36400..5c083f3483 100644
--- a/include/boost/math/special_functions/detail/bessel_jy_series.hpp
+++ b/include/boost/math/special_functions/detail/bessel_jy_series.hpp
@@ -10,10 +10,9 @@
 #pragma once
 #endif
 
-#include <cmath>
-#include <cstdint>
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/cstdint.hpp>
 
 namespace boost { namespace math { namespace detail{
 
@@ -22,7 +21,7 @@ struct bessel_j_small_z_series_term
 {
    typedef T result_type;
 
-   bessel_j_small_z_series_term(T v_, T x)
+   BOOST_MATH_GPU_ENABLED bessel_j_small_z_series_term(T v_, T x)
       : N(0), v(v_)
    {
       BOOST_MATH_STD_USING
@@ -30,7 +29,7 @@ struct bessel_j_small_z_series_term
       mult *= -mult;
       term = 1;
    }
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T r = term;
       ++N;
@@ -49,7 +48,7 @@ struct bessel_j_small_z_series_term
 // Converges rapidly for all z << v.
 //
 template <class T, class Policy>
-inline T bessel_j_small_z_series(T v, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T bessel_j_small_z_series(T v, T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    T prefix;
@@ -66,7 +65,7 @@ inline T bessel_j_small_z_series(T v, T x, const Policy& pol)
       return prefix;
 
    bessel_j_small_z_series_term<T, Policy> s(v, x);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
    T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<T, Policy>(), max_iter);
 
@@ -79,7 +78,7 @@ struct bessel_y_small_z_series_term_a
 {
    typedef T result_type;
 
-   bessel_y_small_z_series_term_a(T v_, T x)
+   BOOST_MATH_GPU_ENABLED bessel_y_small_z_series_term_a(T v_, T x)
       : N(0), v(v_)
    {
       BOOST_MATH_STD_USING
@@ -87,7 +86,7 @@ struct bessel_y_small_z_series_term_a
       mult *= -mult;
       term = 1;
    }
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       BOOST_MATH_STD_USING
       T r = term;
@@ -107,7 +106,7 @@ struct bessel_y_small_z_series_term_b
 {
    typedef T result_type;
 
-   bessel_y_small_z_series_term_b(T v_, T x)
+   BOOST_MATH_GPU_ENABLED bessel_y_small_z_series_term_b(T v_, T x)
       : N(0), v(v_)
    {
       BOOST_MATH_STD_USING
@@ -115,7 +114,7 @@ struct bessel_y_small_z_series_term_b
       mult *= -mult;
       term = 1;
    }
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T r = term;
       ++N;
@@ -138,10 +137,10 @@ struct bessel_y_small_z_series_term_b
 // eps/2 * v^v(x/2)^-v > (x/2)^v or log(eps/2) > v log((x/2)^2/v)
 //
 template <class T, class Policy>
-inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "bessel_y_small_z_series<%1%>(%1%,%1%)";
+   constexpr auto function = "bessel_y_small_z_series<%1%>(%1%,%1%)";
    T prefix;
    T gam;
    T p = log(x / 2);
@@ -183,7 +182,7 @@ inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol)
       prefix = -exp(prefix);
    }
    bessel_y_small_z_series_term_a<T, Policy> s(v, x);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
    *pscale = scale;
 
    T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<T, Policy>(), max_iter);
@@ -211,7 +210,7 @@ inline T bessel_y_small_z_series(T v, T x, T* pscale, const Policy& pol)
 }
 
 template <class T, class Policy>
-T bessel_yn_small_z(int n, T z, T* scale, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T bessel_yn_small_z(int n, T z, T* scale, const Policy& pol)
 {
    //
    // See http://functions.wolfram.com/Bessel-TypeFunctions/BesselY/06/01/04/01/02/
diff --git a/include/boost/math/special_functions/detail/bessel_jy_zero.hpp b/include/boost/math/special_functions/detail/bessel_jy_zero.hpp
index cb1fc48d83..15671c0df7 100644
--- a/include/boost/math/special_functions/detail/bessel_jy_zero.hpp
+++ b/include/boost/math/special_functions/detail/bessel_jy_zero.hpp
@@ -18,19 +18,30 @@
 #ifndef BOOST_MATH_BESSEL_JY_ZERO_2013_01_18_HPP_
   #define BOOST_MATH_BESSEL_JY_ZERO_2013_01_18_HPP_
 
-  #include <algorithm>
+  #include <boost/math/tools/config.hpp>
+  #include <boost/math/tools/tuple.hpp>
+  #include <boost/math/tools/precision.hpp>
+  #include <boost/math/tools/cstdint.hpp>
+  #include <boost/math/tools/roots.hpp>
   #include <boost/math/constants/constants.hpp>
-  #include <boost/math/special_functions/math_fwd.hpp>
   #include <boost/math/special_functions/cbrt.hpp>
   #include <boost/math/special_functions/detail/airy_ai_bi_zero.hpp>
 
+  #ifndef BOOST_MATH_HAS_NVRTC
+  #include <boost/math/special_functions/math_fwd.hpp>
+  #endif
+
+  #ifdef BOOST_MATH_ENABLE_CUDA
+  #  pragma nv_diag_suppress 20012
+  #endif
+
   namespace boost { namespace math {
   namespace detail
   {
     namespace bessel_zero
     {
       template<class T>
-      T equation_nist_10_21_19(const T& v, const T& a)
+      BOOST_MATH_GPU_ENABLED T equation_nist_10_21_19(const T& v, const T& a)
       {
         // Get the initial estimate of the m'th root of Jv or Yv.
         // This subroutine is used for the order m with m > 1.
@@ -57,11 +68,11 @@
       class equation_as_9_3_39_and_its_derivative
       {
       public:
-        explicit equation_as_9_3_39_and_its_derivative(const T& zt) : zeta(zt) { }
+        BOOST_MATH_GPU_ENABLED explicit equation_as_9_3_39_and_its_derivative(const T& zt) : zeta(zt) { }
 
-        equation_as_9_3_39_and_its_derivative(const equation_as_9_3_39_and_its_derivative&) = default;
+        BOOST_MATH_GPU_ENABLED equation_as_9_3_39_and_its_derivative(const equation_as_9_3_39_and_its_derivative&) = default;
 
-        boost::math::tuple<T, T> operator()(const T& z) const
+        BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T> operator()(const T& z) const
         {
           BOOST_MATH_STD_USING // ADL of std names, needed for acos, sqrt.
 
@@ -86,7 +97,7 @@
       };
 
       template<class T, class Policy>
-      static T equation_as_9_5_26(const T& v, const T& ai_bi_root, const Policy& pol)
+      BOOST_MATH_GPU_ENABLED T equation_as_9_5_26(const T& v, const T& ai_bi_root, const Policy& pol)
       {
         BOOST_MATH_STD_USING // ADL of std names, needed for log, sqrt.
 
@@ -132,9 +143,9 @@
 
         // Select the maximum allowed iterations based on the number
         // of decimal digits in the numeric type T, being at least 12.
-        const auto iterations_allowed = static_cast<std::uintmax_t>((std::max)(12, my_digits10 * 2));
+        const auto iterations_allowed = static_cast<boost::math::uintmax_t>(BOOST_MATH_GPU_SAFE_MAX(12, my_digits10 * 2));
 
-        std::uintmax_t iterations_used = iterations_allowed;
+        boost::math::uintmax_t iterations_used = iterations_allowed;
 
         // Calculate the root of z as a function of zeta.
         const T z = boost::math::tools::newton_raphson_iterate(
@@ -142,7 +153,7 @@
           z_estimate,
           range_zmin,
           range_zmax,
-          (std::min)(boost::math::tools::digits<T>(), boost::math::tools::digits<float>()),
+          BOOST_MATH_GPU_SAFE_MIN(boost::math::tools::digits<T>(), boost::math::tools::digits<float>()),
           iterations_used);
 
         static_cast<void>(iterations_used);
@@ -168,7 +179,7 @@
       namespace cyl_bessel_j_zero_detail
       {
         template<class T, class Policy>
-        T equation_nist_10_21_40_a(const T& v, const Policy& pol)
+        BOOST_MATH_GPU_ENABLED T equation_nist_10_21_40_a(const T& v, const Policy& pol)
         {
           const T v_pow_third(boost::math::cbrt(v, pol));
           const T v_pow_minus_two_thirds(T(1) / (v_pow_third * v_pow_third));
@@ -185,13 +196,13 @@
         class function_object_jv
         {
         public:
-          function_object_jv(const T& v,
+          BOOST_MATH_GPU_ENABLED function_object_jv(const T& v,
                              const Policy& pol) : my_v(v),
                                                   my_pol(pol) { }
 
-          function_object_jv(const function_object_jv&) = default;
+          BOOST_MATH_GPU_ENABLED function_object_jv(const function_object_jv&) = default;
 
-          T operator()(const T& x) const
+          BOOST_MATH_GPU_ENABLED T operator()(const T& x) const
           {
             return boost::math::cyl_bessel_j(my_v, x, my_pol);
           }
@@ -206,15 +217,16 @@
         class function_object_jv_and_jv_prime
         {
         public:
-          function_object_jv_and_jv_prime(const T& v,
-                                          const bool order_is_zero,
-                                          const Policy& pol) : my_v(v),
+          BOOST_MATH_GPU_ENABLED function_object_jv_and_jv_prime(
+                                                         const T& v,
+                                                         const bool order_is_zero,
+                                                         const Policy& pol) : my_v(v),
                                                                my_order_is_zero(order_is_zero),
                                                                my_pol(pol) { }
 
           function_object_jv_and_jv_prime(const function_object_jv_and_jv_prime&) = default;
 
-          boost::math::tuple<T, T> operator()(const T& x) const
+          BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T> operator()(const T& x) const
           {
             // Obtain Jv(x) and Jv'(x).
             // Chris's original code called the Bessel function implementation layer direct, 
@@ -246,10 +258,10 @@
           const function_object_jv_and_jv_prime& operator=(const function_object_jv_and_jv_prime&) = delete;
         };
 
-        template<class T> bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; }
+        template<class T> BOOST_MATH_GPU_ENABLED bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; }
 
         template<class T, class Policy>
-        T initial_guess(const T& v, const int m, const Policy& pol)
+        BOOST_MATH_GPU_ENABLED T initial_guess(const T& v, const int m, const Policy& pol)
         {
           BOOST_MATH_STD_USING // ADL of std names, needed for floor.
 
@@ -325,7 +337,7 @@
             }
 
             // Perform several steps of bisection iteration to refine the guess.
-            std::uintmax_t number_of_iterations(12U);
+            boost::math::uintmax_t number_of_iterations(12U);
 
             // Do the bisection iteration.
             const boost::math::tuple<T, T> guess_pair =
@@ -390,7 +402,7 @@
       namespace cyl_neumann_zero_detail
       {
         template<class T, class Policy>
-        T equation_nist_10_21_40_b(const T& v, const Policy& pol)
+        BOOST_MATH_GPU_ENABLED T equation_nist_10_21_40_b(const T& v, const Policy& pol)
         {
           const T v_pow_third(boost::math::cbrt(v, pol));
           const T v_pow_minus_two_thirds(T(1) / (v_pow_third * v_pow_third));
@@ -407,13 +419,13 @@
         class function_object_yv
         {
         public:
-          function_object_yv(const T& v,
-                             const Policy& pol) : my_v(v),
-                                                  my_pol(pol) { }
+          BOOST_MATH_GPU_ENABLED function_object_yv(const T& v,
+                                                    const Policy& pol) : my_v(v),
+                                                                         my_pol(pol) { }
 
-          function_object_yv(const function_object_yv&) = default;
+          BOOST_MATH_GPU_ENABLED function_object_yv(const function_object_yv&) = default;
 
-          T operator()(const T& x) const
+          BOOST_MATH_GPU_ENABLED T operator()(const T& x) const
           {
             return boost::math::cyl_neumann(my_v, x, my_pol);
           }
@@ -428,13 +440,13 @@
         class function_object_yv_and_yv_prime
         {
         public:
-          function_object_yv_and_yv_prime(const T& v,
-                                          const Policy& pol) : my_v(v),
-                                                               my_pol(pol) { }
+          BOOST_MATH_GPU_ENABLED function_object_yv_and_yv_prime(const T& v,
+                                                                 const Policy& pol) : my_v(v),
+                                                                                      my_pol(pol) { }
 
-          function_object_yv_and_yv_prime(const function_object_yv_and_yv_prime&) = default;
+          BOOST_MATH_GPU_ENABLED function_object_yv_and_yv_prime(const function_object_yv_and_yv_prime&) = default;
 
-          boost::math::tuple<T, T> operator()(const T& x) const
+          BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T> operator()(const T& x) const
           {
             const T half_epsilon(boost::math::tools::epsilon<T>() / 2U);
 
@@ -469,10 +481,10 @@
           const function_object_yv_and_yv_prime& operator=(const function_object_yv_and_yv_prime&) = delete;
         };
 
-        template<class T> bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; }
+        template<class T> BOOST_MATH_GPU_ENABLED bool my_bisection_unreachable_tolerance(const T&, const T&) { return false; }
 
         template<class T, class Policy>
-        T initial_guess(const T& v, const int m, const Policy& pol)
+        BOOST_MATH_GPU_ENABLED T initial_guess(const T& v, const int m, const Policy& pol)
         {
           BOOST_MATH_STD_USING // ADL of std names, needed for floor.
 
@@ -560,7 +572,7 @@
             }
 
             // Perform several steps of bisection iteration to refine the guess.
-            std::uintmax_t number_of_iterations(12U);
+            boost::math::uintmax_t number_of_iterations(12U);
 
             // Do the bisection iteration.
             const boost::math::tuple<T, T> guess_pair =
@@ -624,4 +636,8 @@
     } // namespace bessel_zero
   } } } // namespace boost::math::detail
 
+  #ifdef BOOST_MATH_ENABLE_CUDA
+  #  pragma nv_diag_default 20012
+  #endif
+
 #endif // BOOST_MATH_BESSEL_JY_ZERO_2013_01_18_HPP_
diff --git a/include/boost/math/special_functions/detail/bessel_k0.hpp b/include/boost/math/special_functions/detail/bessel_k0.hpp
index f29ffa75c4..bab202b6cd 100644
--- a/include/boost/math/special_functions/detail/bessel_k0.hpp
+++ b/include/boost/math/special_functions/detail/bessel_k0.hpp
@@ -13,10 +13,14 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/precision.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/big_constant.hpp>
-#include <boost/math/policies/error_handling.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -44,35 +48,37 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T>
-T bessel_k0(const T& x);
+BOOST_MATH_GPU_ENABLED T bessel_k0(const T& x);
 
 template <class T, class tag>
 struct bessel_k0_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          do_init(tag());
       }
-      static void do_init(const std::integral_constant<int, 113>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 113>&)
       {
          bessel_k0(T(0.5));
          bessel_k0(T(1.5));
       }
-      static void do_init(const std::integral_constant<int, 64>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 64>&)
       {
          bessel_k0(T(0.5));
          bessel_k0(T(1.5));
       }
       template <class U>
-      static void do_init(const U&){}
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED static void do_init(const U&){}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
-   static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_STATIC const init initializer;
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -81,14 +87,14 @@ const typename bessel_k0_initializer<T, tag>::init bessel_k0_initializer<T, tag>
 
 
 template <typename T, int N>
-T bessel_k0_imp(const T&, const std::integral_constant<int, N>&)
+BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T&, const boost::math::integral_constant<int, N>&)
 {
    BOOST_MATH_ASSERT(0);
    return 0;
 }
 
 template <typename T>
-T bessel_k0_imp(const T& x, const std::integral_constant<int, 24>&)
+BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant<int, 24>&)
 {
    BOOST_MATH_STD_USING
    if(x <= 1)
@@ -97,14 +103,14 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 24>&)
       // Expected Error Term : -2.358e-09
       // Maximum Relative Change in Control Points : 9.552e-02
       // Max Error found at float precision = Poly : 4.448220e-08
-      static const T Y = 1.137250900268554688f;
-      static const T P[] = 
+      BOOST_MATH_STATIC const T Y = 1.137250900268554688f;
+      BOOST_MATH_STATIC const T P[] = 
       {
          -1.372508979104259711e-01f,
          2.622545986273687617e-01f,
          5.047103728247919836e-03f
       };
-      static const T Q[] = 
+      BOOST_MATH_STATIC const T Q[] = 
       {
          1.000000000000000000e+00f,
          -8.928694018000029415e-02f,
@@ -117,7 +123,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 24>&)
       // Expected Error Term : -1.343e-09
       // Maximum Relative Change in Control Points : 2.405e-02
       // Max Error found at float precision = Poly : 1.354814e-07
-      static const T P2[] = {
+      BOOST_MATH_STATIC const T P2[] = {
          1.159315158e-01f,
          2.789828686e-01f,
          2.524902861e-02f,
@@ -133,14 +139,14 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 24>&)
       // Maximum Relative Change in Control Points : 9.064e-02
       // Max Error found at float precision = Poly : 5.065020e-08
 
-      static const T P[] =
+      BOOST_MATH_STATIC const T P[] =
       {
          2.533141220e-01f,
          5.221502603e-01f,
          6.380180669e-02f,
          -5.934976547e-02f
       };
-      static const T Q[] =
+      BOOST_MATH_STATIC const T Q[] =
       {
          1.000000000e+00f,
          2.679722431e+00f,
@@ -158,7 +164,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 24>&)
 }
 
 template <typename T>
-T bessel_k0_imp(const T& x, const std::integral_constant<int, 53>&)
+BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant<int, 53>&)
 {
    BOOST_MATH_STD_USING
    if(x <= 1)
@@ -167,8 +173,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 53>&)
       // Expected Error Term : -6.077e-17
       // Maximum Relative Change in Control Points : 7.797e-02
       // Max Error found at double precision = Poly : 1.003156e-16
-      static const T Y = 1.137250900268554688;
-      static const T P[] =
+      BOOST_MATH_STATIC const T Y = 1.137250900268554688;
+      BOOST_MATH_STATIC const T P[] =
       {
          -1.372509002685546267e-01,
          2.574916117833312855e-01,
@@ -176,7 +182,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 53>&)
          5.445476986653926759e-04,
          7.125159422136622118e-06
       };
-      static const T Q[] =
+      BOOST_MATH_STATIC const T Q[] =
       {
          1.000000000000000000e+00,
          -5.458333438017788530e-02,
@@ -191,7 +197,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 53>&)
       // Expected Error Term : 3.392e-18
       // Maximum Relative Change in Control Points : 2.041e-02
       // Max Error found at double precision = Poly : 2.513112e-16
-      static const T P2[] =
+      BOOST_MATH_STATIC const T P2[] =
       {
          1.159315156584124484e-01,
          2.789828789146031732e-01,
@@ -212,8 +218,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 53>&)
       // Maximum Relative Change in Control Points : 2.757e-01
       // Max Error found at double precision = Poly : 1.001560e-16
 
-      static const T Y = 1;
-      static const T P[] =
+      BOOST_MATH_STATIC const T Y = 1;
+      BOOST_MATH_STATIC const T P[] =
       {
          2.533141373155002416e-01,
          3.628342133984595192e+00,
@@ -225,7 +231,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 53>&)
          -1.414237994269995877e+00,
          -9.369168119754924625e-02
       };
-      static const T Q[] =
+      BOOST_MATH_STATIC const T Q[] =
       {
          1.000000000000000000e+00,
          1.494194694879908328e+01,
@@ -248,7 +254,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 53>&)
 }
 
 template <typename T>
-T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
+BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant<int, 64>&)
 {
    BOOST_MATH_STD_USING
       if(x <= 1)
@@ -257,8 +263,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
          // Expected Error Term : 2.180e-22
          // Maximum Relative Change in Control Points : 2.943e-01
          // Max Error found at float80 precision = Poly : 3.923207e-20
-         static const T Y = 1.137250900268554687500e+00;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1.137250900268554687500e+00;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.372509002685546875002e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, 2.566481981037407600436e-01),
@@ -267,7 +273,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.213747930378196492543e-05),
             BOOST_MATH_BIG_CONSTANT(T, 64, 9.423709328020389560844e-08)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.843828412587773008342e-02),
@@ -284,7 +290,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
          // Expected Error Term : -2.434e-21
          // Maximum Relative Change in Control Points : 2.459e-02
          // Max Error found at float80 precision = Poly : 1.482487e-19
-         static const T P2[] =
+         BOOST_MATH_STATIC const T P2[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.159315156584124488110e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, 2.764832791416047889734e-01),
@@ -292,7 +298,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
             BOOST_MATH_BIG_CONSTANT(T, 64, 3.660777862036966089410e-04),
             BOOST_MATH_BIG_CONSTANT(T, 64, 2.094942446930673386849e-06)
          };
-         static const T Q2[] =
+         BOOST_MATH_STATIC const T Q2[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, -2.156100313881251616320e-02),
@@ -308,8 +314,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
          // Expected Error Term : 2.236e-21
          // Maximum Relative Change in Control Points : 3.021e-01
          //Max Error found at float80 precision = Poly : 8.727378e-20
-         static const T Y = 1;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 2.533141373155002512056e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, 5.417942070721928652715e+00),
@@ -323,7 +329,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.059789241612946683713e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.612783121537333908889e-01)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, 2.200669254769325861404e+01),
@@ -348,7 +354,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 64>&)
 }
 
 template <typename T>
-T bessel_k0_imp(const T& x, const std::integral_constant<int, 113>&)
+BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant<int, 113>&)
 {
    BOOST_MATH_STD_USING
       if(x <= 1)
@@ -357,8 +363,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 113>&)
          // Expected Error Term : 5.682e-37
          // Maximum Relative Change in Control Points : 6.094e-04
          // Max Error found at float128 precision = Poly : 5.338213e-35
-         static const T Y = 1.137250900268554687500000000000000000e+00f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1.137250900268554687500000000000000000e+00f;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, -1.372509002685546875000000000000000006e-01),
             BOOST_MATH_BIG_CONSTANT(T, 113, 2.556212905071072782462974351698081303e-01),
@@ -369,7 +375,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 113>&)
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.752489221949580551692915881999762125e-09),
             BOOST_MATH_BIG_CONSTANT(T, 113, 5.243010555737173524710512824955368526e-12)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 113, -4.095631064064621099785696980653193721e-02),
@@ -387,7 +393,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 113>&)
          // Expected Error Term : 5.105e-38
          // Maximum Relative Change in Control Points : 9.734e-03
          // Max Error found at float128 precision = Poly : 1.688806e-34
-         static const T P2[] =
+         BOOST_MATH_STATIC const T P2[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.159315156584124488107200313757741370e-01),
             BOOST_MATH_BIG_CONSTANT(T, 113, 2.789828789146031122026800078439435369e-01),
@@ -413,8 +419,8 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 113>&)
          // Expected Error Term : 4.917e-40
          // Maximum Relative Change in Control Points : 3.385e-01
          // Max Error found at float128 precision = Poly : 1.567573e-34
-         static const T Y = 1;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 2.533141373155002512078826424055226265e-01),
             BOOST_MATH_BIG_CONSTANT(T, 113, 2.001949740768235770078339977110749204e+01),
@@ -439,7 +445,7 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 113>&)
             BOOST_MATH_BIG_CONSTANT(T, 113, -4.201632288615609937883545928660649813e+03),
             BOOST_MATH_BIG_CONSTANT(T, 113, -3.690820607338480548346746717311811406e+01)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 113, 7.964877874035741452203497983642653107e+01),
@@ -475,33 +481,33 @@ T bessel_k0_imp(const T& x, const std::integral_constant<int, 113>&)
 }
 
 template <typename T>
-T bessel_k0_imp(const T& x, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED T bessel_k0_imp(const T& x, const boost::math::integral_constant<int, 0>&)
 {
    if(boost::math::tools::digits<T>() <= 24)
-      return bessel_k0_imp(x, std::integral_constant<int, 24>());
+      return bessel_k0_imp(x, boost::math::integral_constant<int, 24>());
    else if(boost::math::tools::digits<T>() <= 53)
-      return bessel_k0_imp(x, std::integral_constant<int, 53>());
+      return bessel_k0_imp(x, boost::math::integral_constant<int, 53>());
    else if(boost::math::tools::digits<T>() <= 64)
-      return bessel_k0_imp(x, std::integral_constant<int, 64>());
+      return bessel_k0_imp(x, boost::math::integral_constant<int, 64>());
    else if(boost::math::tools::digits<T>() <= 113)
-      return bessel_k0_imp(x, std::integral_constant<int, 113>());
+      return bessel_k0_imp(x, boost::math::integral_constant<int, 113>());
    BOOST_MATH_ASSERT(0);
    return 0;
 }
 
 template <typename T>
-inline T bessel_k0(const T& x)
+BOOST_MATH_GPU_ENABLED inline T bessel_k0(const T& x)
 {
-   typedef std::integral_constant<int,
-      ((std::numeric_limits<T>::digits == 0) || (std::numeric_limits<T>::radix != 2)) ?
+   typedef boost::math::integral_constant<int,
+      ((boost::math::numeric_limits<T>::digits == 0) || (boost::math::numeric_limits<T>::radix != 2)) ?
       0 :
-      std::numeric_limits<T>::digits <= 24 ?
+      boost::math::numeric_limits<T>::digits <= 24 ?
       24 :
-      std::numeric_limits<T>::digits <= 53 ?
+      boost::math::numeric_limits<T>::digits <= 53 ?
       53 :
-      std::numeric_limits<T>::digits <= 64 ?
+      boost::math::numeric_limits<T>::digits <= 64 ?
       64 :
-      std::numeric_limits<T>::digits <= 113 ?
+      boost::math::numeric_limits<T>::digits <= 113 ?
       113 : -1
    > tag_type;
 
diff --git a/include/boost/math/special_functions/detail/bessel_k1.hpp b/include/boost/math/special_functions/detail/bessel_k1.hpp
index bd37f90215..49846dc8c5 100644
--- a/include/boost/math/special_functions/detail/bessel_k1.hpp
+++ b/include/boost/math/special_functions/detail/bessel_k1.hpp
@@ -13,6 +13,10 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/precision.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/big_constant.hpp>
 #include <boost/math/policies/error_handling.hpp>
@@ -44,36 +48,38 @@
 namespace boost { namespace math { namespace detail{
 
    template <typename T>
-   T bessel_k1(const T&);
+   BOOST_MATH_GPU_ENABLED T bessel_k1(const T&);
 
    template <class T, class tag>
    struct bessel_k1_initializer
    {
       struct init
       {
-         init()
+         BOOST_MATH_GPU_ENABLED init()
          {
             do_init(tag());
          }
-         static void do_init(const std::integral_constant<int, 113>&)
+         BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 113>&)
          {
             bessel_k1(T(0.5));
             bessel_k1(T(2));
             bessel_k1(T(6));
          }
-         static void do_init(const std::integral_constant<int, 64>&)
+         BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 64>&)
          {
             bessel_k1(T(0.5));
             bessel_k1(T(6));
          }
          template <class U>
-         static void do_init(const U&) {}
-         void force_instantiate()const {}
+         BOOST_MATH_GPU_ENABLED static void do_init(const U&) {}
+         BOOST_MATH_GPU_ENABLED void force_instantiate()const {}
       };
-      static const init initializer;
-      static void force_instantiate()
+      BOOST_MATH_STATIC const init initializer;
+      BOOST_MATH_GPU_ENABLED static void force_instantiate()
       {
+         #ifndef BOOST_MATH_HAS_GPU_SUPPORT
          initializer.force_instantiate();
+         #endif
       }
    };
 
@@ -82,14 +88,14 @@ namespace boost { namespace math { namespace detail{
 
 
    template <typename T, int N>
-   inline T bessel_k1_imp(const T&, const std::integral_constant<int, N>&)
+   inline BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T&, const boost::math::integral_constant<int, N>&)
    {
       BOOST_MATH_ASSERT(0);
       return 0;
    }
 
    template <typename T>
-   T bessel_k1_imp(const T& x, const std::integral_constant<int, 24>&)
+   BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant<int, 24>&)
    {
       BOOST_MATH_STD_USING
       if(x <= 1)
@@ -98,14 +104,14 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : -3.053e-12
          // Maximum Relative Change in Control Points : 4.927e-02
          // Max Error found at float precision = Poly : 7.918347e-10
-         static const T Y = 8.695471287e-02f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 8.695471287e-02f;
+         BOOST_MATH_STATIC const T P[] =
          {
             -3.621379531e-03f,
             7.131781976e-03f,
             -1.535278300e-05f
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             1.000000000e+00f,
             -5.173102701e-02f,
@@ -118,7 +124,7 @@ namespace boost { namespace math { namespace detail{
          // Maximum Deviation Found:                     3.556e-08
          // Expected Error Term : -3.541e-08
          // Maximum Relative Change in Control Points : 8.203e-02
-         static const T P2[] =
+         BOOST_MATH_STATIC const T P2[] =
          {
             -3.079657469e-01f,
             -8.537108913e-02f,
@@ -134,15 +140,15 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : -3.227e-08
          // Maximum Relative Change in Control Points : 9.917e-02
          // Max Error found at float precision = Poly : 6.084411e-08
-         static const T Y = 1.450342178f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1.450342178f;
+         BOOST_MATH_STATIC const T P[] =
          {
             -1.970280088e-01f,
             2.188747807e-02f,
             7.270394756e-01f,
             2.490678196e-01f
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             1.000000000e+00f,
             2.274292882e+00f,
@@ -160,7 +166,7 @@ namespace boost { namespace math { namespace detail{
    }
 
    template <typename T>
-   T bessel_k1_imp(const T& x, const std::integral_constant<int, 53>&)
+   BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant<int, 53>&)
    {
       BOOST_MATH_STD_USING
       if(x <= 1)
@@ -169,15 +175,15 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : 1.921e-17
          // Maximum Relative Change in Control Points : 5.287e-03
          // Max Error found at double precision = Poly : 2.004747e-17
-         static const T Y = 8.69547128677368164e-02f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 8.69547128677368164e-02f;
+         BOOST_MATH_STATIC const T P[] =
          {
             -3.62137953440350228e-03,
             7.11842087490330300e-03,
             1.00302560256614306e-05,
             1.77231085381040811e-06
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             1.00000000000000000e+00,
             -4.80414794429043831e-02,
@@ -193,14 +199,14 @@ namespace boost { namespace math { namespace detail{
          // Maximum Relative Change in Control Points : 3.103e-04
          // Max Error found at double precision = Poly : 1.246698e-16
 
-         static const T P2[] =
+         BOOST_MATH_STATIC const T P2[] =
          {
             -3.07965757829206184e-01,
             -7.80929703673074907e-02,
             -2.70619343754051620e-03,
             -2.49549522229072008e-05
          };
-         static const T Q2[] = 
+         BOOST_MATH_STATIC const T Q2[] = 
          {
             1.00000000000000000e+00,
             -2.36316836412163098e-02,
@@ -217,8 +223,8 @@ namespace boost { namespace math { namespace detail{
          // Maximum Relative Change in Control Points : 2.786e-01
          // Max Error found at double precision = Poly : 1.258798e-16
 
-         static const T Y = 1.45034217834472656f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1.45034217834472656f;
+         BOOST_MATH_STATIC const T P[] =
          {
             -1.97028041029226295e-01,
             -2.32408961548087617e+00,
@@ -230,7 +236,7 @@ namespace boost { namespace math { namespace detail{
             6.62582288933739787e+00,
             3.08851840645286691e-01
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             1.00000000000000000e+00,
             1.41811409298826118e+01,
@@ -253,7 +259,7 @@ namespace boost { namespace math { namespace detail{
    }
 
    template <typename T>
-   T bessel_k1_imp(const T& x, const std::integral_constant<int, 64>&)
+   BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant<int, 64>&)
    {
       BOOST_MATH_STD_USING
       if(x <= 1)
@@ -262,8 +268,8 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : -5.548e-23
          // Maximum Relative Change in Control Points : 2.002e-03
          // Max Error found at float80 precision = Poly : 9.352785e-22
-         static const T Y = 8.695471286773681640625e-02f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 8.695471286773681640625e-02f;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, -3.621379534403483072861e-03),
             BOOST_MATH_BIG_CONSTANT(T, 64, 7.102135866103952705932e-03),
@@ -271,7 +277,7 @@ namespace boost { namespace math { namespace detail{
             BOOST_MATH_BIG_CONSTANT(T, 64, 2.537484002571894870830e-06),
             BOOST_MATH_BIG_CONSTANT(T, 64, 6.603228256820000135990e-09)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.354457194045068370363e-02),
@@ -287,7 +293,7 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : 1.995e-23
          // Maximum Relative Change in Control Points : 8.174e-04
          // Max Error found at float80 precision = Poly : 4.137325e-20
-         static const T P2[] =
+         BOOST_MATH_STATIC const T P2[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, -3.079657578292062244054e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -7.963049154965966503231e-02),
@@ -295,7 +301,7 @@ namespace boost { namespace math { namespace detail{
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.023052834702215699504e-05),
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.719459155018493821839e-07)
          };
-         static const T Q2[] = 
+         BOOST_MATH_STATIC const T Q2[] = 
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.863917670410152669768e-02),
@@ -312,8 +318,8 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : -3.302e-21
          // Maximum Relative Change in Control Points : 3.432e-01
          // Max Error found at float80 precision = Poly : 1.083755e-19
-         static const T Y = 1.450342178344726562500e+00f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1.450342178344726562500e+00f;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, -1.970280410292263112917e-01),
             BOOST_MATH_BIG_CONSTANT(T, 64, -4.058564803062959169322e+00),
@@ -328,7 +334,7 @@ namespace boost { namespace math { namespace detail{
             BOOST_MATH_BIG_CONSTANT(T, 64, 4.319614662598089438939e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, 3.710715864316521856193e-02)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 64, 2.298433045824439052398e+01),
@@ -353,7 +359,7 @@ namespace boost { namespace math { namespace detail{
    }
 
    template <typename T>
-   T bessel_k1_imp(const T& x, const std::integral_constant<int, 113>&)
+   BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant<int, 113>&)
    {
       BOOST_MATH_STD_USING
       if(x <= 1)
@@ -362,8 +368,8 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : -7.119e-35
          // Maximum Relative Change in Control Points : 1.207e-03
          // Max Error found at float128 precision = Poly : 7.143688e-35
-         static const T Y = 8.695471286773681640625000000000000000e-02f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 8.695471286773681640625000000000000000e-02f;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, -3.621379534403483072916666666666595475e-03),
             BOOST_MATH_BIG_CONSTANT(T, 113, 7.074117676930975433219826471336547627e-03),
@@ -373,7 +379,7 @@ namespace boost { namespace math { namespace detail{
             BOOST_MATH_BIG_CONSTANT(T, 113, 2.347140307321161346703214099534250263e-10),
             BOOST_MATH_BIG_CONSTANT(T, 113, 5.569608494081482873946791086435679661e-13)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 113, -3.580768910152105375615558920428350204e-02),
@@ -391,7 +397,7 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : 4.473e-37
          // Maximum Relative Change in Control Points : 8.550e-04
          // Max Error found at float128 precision = Poly : 8.167701e-35
-         static const T P2[] =
+         BOOST_MATH_STATIC const T P2[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, -3.079657578292062244053600156878870690e-01),
             BOOST_MATH_BIG_CONSTANT(T, 113, -8.133183745732467770755578848987414875e-02),
@@ -401,7 +407,7 @@ namespace boost { namespace math { namespace detail{
             BOOST_MATH_BIG_CONSTANT(T, 113, -1.632502325880313239698965376754406011e-09),
             BOOST_MATH_BIG_CONSTANT(T, 113, -2.311973065898784812266544485665624227e-12)
          };
-         static const T Q2[] = 
+         BOOST_MATH_STATIC const T Q2[] = 
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 113, -1.311471216733781016657962995723287450e-02),
@@ -418,8 +424,8 @@ namespace boost { namespace math { namespace detail{
       {
          // Max error in interpolated form: 5.307e-37
          // Max Error found at float128 precision = Poly: 7.087862e-35
-         static const T Y = 1.5023040771484375f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1.5023040771484375f;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, -2.489899398329369710528254347931380044e-01),
             BOOST_MATH_BIG_CONSTANT(T, 113, -6.819080211203854781858815596508456873e+00),
@@ -438,7 +444,7 @@ namespace boost { namespace math { namespace detail{
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.039705646510167437971862966128055524e+00),
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.008418100718254816100425022904039530e-02)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 113, 2.927456835239137986889227412815459529e+01),
@@ -465,8 +471,8 @@ namespace boost { namespace math { namespace detail{
          // Expected Error Term : -6.565e-40
          // Maximum Relative Change in Control Points : 1.880e-01
          // Max Error found at float128 precision = Poly : 2.943572e-35
-         static const T Y = 1.308816909790039062500000000000000000f;
-         static const T P[] =
+         BOOST_MATH_STATIC const T Y = 1.308816909790039062500000000000000000f;
+         BOOST_MATH_STATIC const T P[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, -5.550277247453881129211735759447737350e-02),
             BOOST_MATH_BIG_CONSTANT(T, 113, -3.485883080219574328217554864956175929e+00),
@@ -486,7 +492,7 @@ namespace boost { namespace math { namespace detail{
             BOOST_MATH_BIG_CONSTANT(T, 113, 8.981057433937398731355768088809437625e+05),
             BOOST_MATH_BIG_CONSTANT(T, 113, 2.519440069856232098711793483639792952e+04)
          };
-         static const T Q[] =
+         BOOST_MATH_STATIC const T Q[] =
          {
             BOOST_MATH_BIG_CONSTANT(T, 113, 1.000000000000000000000000000000000000e+00),
             BOOST_MATH_BIG_CONSTANT(T, 113, 7.127348248283623146544565916604103560e+01),
@@ -517,33 +523,33 @@ namespace boost { namespace math { namespace detail{
     }
 
     template <typename T>
-    T bessel_k1_imp(const T& x, const std::integral_constant<int, 0>&)
+    BOOST_MATH_GPU_ENABLED T bessel_k1_imp(const T& x, const boost::math::integral_constant<int, 0>&)
     {
        if(boost::math::tools::digits<T>() <= 24)
-          return bessel_k1_imp(x, std::integral_constant<int, 24>());
+          return bessel_k1_imp(x, boost::math::integral_constant<int, 24>());
        else if(boost::math::tools::digits<T>() <= 53)
-          return bessel_k1_imp(x, std::integral_constant<int, 53>());
+          return bessel_k1_imp(x, boost::math::integral_constant<int, 53>());
        else if(boost::math::tools::digits<T>() <= 64)
-          return bessel_k1_imp(x, std::integral_constant<int, 64>());
+          return bessel_k1_imp(x, boost::math::integral_constant<int, 64>());
        else if(boost::math::tools::digits<T>() <= 113)
-          return bessel_k1_imp(x, std::integral_constant<int, 113>());
+          return bessel_k1_imp(x, boost::math::integral_constant<int, 113>());
        BOOST_MATH_ASSERT(0);
        return 0;
     }
 
-    template <typename T>
-   inline T bessel_k1(const T& x)
+   template <typename T>
+   inline BOOST_MATH_GPU_ENABLED T bessel_k1(const T& x)
    {
-      typedef std::integral_constant<int,
-         ((std::numeric_limits<T>::digits == 0) || (std::numeric_limits<T>::radix != 2)) ?
+      typedef boost::math::integral_constant<int,
+         ((boost::math::numeric_limits<T>::digits == 0) || (boost::math::numeric_limits<T>::radix != 2)) ?
          0 :
-         std::numeric_limits<T>::digits <= 24 ?
+         boost::math::numeric_limits<T>::digits <= 24 ?
          24 :
-         std::numeric_limits<T>::digits <= 53 ?
+         boost::math::numeric_limits<T>::digits <= 53 ?
          53 :
-         std::numeric_limits<T>::digits <= 64 ?
+         boost::math::numeric_limits<T>::digits <= 64 ?
          64 :
-         std::numeric_limits<T>::digits <= 113 ?
+         boost::math::numeric_limits<T>::digits <= 113 ?
          113 : -1
       > tag_type;
 
diff --git a/include/boost/math/special_functions/detail/bessel_kn.hpp b/include/boost/math/special_functions/detail/bessel_kn.hpp
index d0ddcd0db4..41becc8aa9 100644
--- a/include/boost/math/special_functions/detail/bessel_kn.hpp
+++ b/include/boost/math/special_functions/detail/bessel_kn.hpp
@@ -10,8 +10,12 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/detail/bessel_k0.hpp>
 #include <boost/math/special_functions/detail/bessel_k1.hpp>
+#include <boost/math/special_functions/sign.hpp>
 #include <boost/math/policies/error_handling.hpp>
 
 // Modified Bessel function of the second kind of integer order
@@ -20,14 +24,14 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T bessel_kn(int n, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T bessel_kn(int n, T x, const Policy& pol)
 {
     BOOST_MATH_STD_USING
     T value, current, prev;
 
     using namespace boost::math::tools;
 
-    static const char* function = "boost::math::bessel_kn<%1%>(%1%,%1%)";
+    constexpr auto function = "boost::math::bessel_kn<%1%>(%1%,%1%)";
 
     if (x < 0)
     {
diff --git a/include/boost/math/special_functions/detail/bessel_y0.hpp b/include/boost/math/special_functions/detail/bessel_y0.hpp
index 1679820d19..f1aea6acbd 100644
--- a/include/boost/math/special_functions/detail/bessel_y0.hpp
+++ b/include/boost/math/special_functions/detail/bessel_y0.hpp
@@ -12,6 +12,7 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/detail/bessel_j0.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/tools/rational.hpp>
@@ -36,12 +37,12 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T bessel_y0(T x, const Policy&);
+BOOST_MATH_GPU_ENABLED T bessel_y0(T x, const Policy&);
 
 template <typename T, typename Policy>
-T bessel_y0(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED T bessel_y0(T x, const Policy&)
 {
-    static const T P1[] = {
+    BOOST_MATH_STATIC const T P1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0723538782003176831e+11)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -8.3716255451260504098e+09)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.0422274357376619816e+08)),
@@ -49,7 +50,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0102532948020907590e+04)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8402381979244993524e+01)),
     };
-    static const T Q1[] = {
+    BOOST_MATH_STATIC const T Q1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.8873865738997033405e+11)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.1617187777290363573e+09)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.5662956624278251596e+07)),
@@ -57,7 +58,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6475986689240190091e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T P2[] = {
+    BOOST_MATH_STATIC const T P2[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -2.2213976967566192242e+13)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -5.5107435206722644429e+11)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.3600098638603061642e+10)),
@@ -66,7 +67,7 @@ T bessel_y0(T x, const Policy&)
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4566865832663635920e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.7427031242901594547e+01)),
     };
-    static const T Q2[] = {
+    BOOST_MATH_STATIC const T Q2[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.3386146580707264428e+14)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.4266824419412347550e+12)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.4015103849971240096e+10)),
@@ -75,7 +76,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.3030857612070288823e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T P3[] = {
+    BOOST_MATH_STATIC const T P3[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -8.0728726905150210443e+15)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 6.7016641869173237784e+14)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2829912364088687306e+11)),
@@ -85,7 +86,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1363534169313901632e+04)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.7439661319197499338e+01)),
     };
-    static const T Q3[] = {
+    BOOST_MATH_STATIC const T Q3[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.4563724628846457519e+17)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.9272425569640309819e+15)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2598377924042897629e+13)),
@@ -95,7 +96,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.7903362168128450017e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T PC[] = {
+    BOOST_MATH_STATIC const T PC[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684302e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1345386639580765797e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1170523380864944322e+04)),
@@ -103,7 +104,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5376201909008354296e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.8961548424210455236e-01)),
     };
-    static const T QC[] = {
+    BOOST_MATH_STATIC const T QC[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2779090197304684318e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1370412495510416640e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1215350561880115730e+04)),
@@ -111,7 +112,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.5711159858080893649e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T PS[] = {
+    BOOST_MATH_STATIC const T PS[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -8.9226600200800094098e+01)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.8591953644342993800e+02)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.1183429920482737611e+02)),
@@ -119,7 +120,7 @@ T bessel_y0(T x, const Policy&)
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2441026745835638459e+00)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -8.8033303048680751817e-03)),
     };
-    static const T QS[] = {
+    BOOST_MATH_STATIC const T QS[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.7105024128512061905e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.1951131543434613647e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.2642780169211018836e+03)),
@@ -127,7 +128,7 @@ T bessel_y0(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 9.0593769594993125859e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.9357696627916752158e-01)),
+    BOOST_MATH_STATIC const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.9357696627916752158e-01)),
                    x2  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.9576784193148578684e+00)),
                    x3  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0860510603017726976e+00)),
                    x11 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.280e+02)),
diff --git a/include/boost/math/special_functions/detail/bessel_y1.hpp b/include/boost/math/special_functions/detail/bessel_y1.hpp
index 3ac696bb5c..0f0dbdf3bb 100644
--- a/include/boost/math/special_functions/detail/bessel_y1.hpp
+++ b/include/boost/math/special_functions/detail/bessel_y1.hpp
@@ -12,6 +12,7 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/detail/bessel_j1.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/tools/rational.hpp>
@@ -36,12 +37,12 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T bessel_y1(T x, const Policy&);
+BOOST_MATH_GPU_ENABLED T bessel_y1(T x, const Policy&);
 
 template <typename T, typename Policy>
-T bessel_y1(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED T bessel_y1(T x, const Policy&)
 {
-    static const T P1[] = {
+    BOOST_MATH_STATIC const T P1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.0535726612579544093e+13)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.4708611716525426053e+12)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.7595974497819597599e+11)),
@@ -50,7 +51,7 @@ T bessel_y1(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.2157953222280260820e+05)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -3.1714424660046133456e+02)),
     };
-    static const T Q1[] = {
+    BOOST_MATH_STATIC const T Q1[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.0737873921079286084e+14)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.1272286200406461981e+12)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.7800352738690585613e+10)),
@@ -59,7 +60,7 @@ T bessel_y1(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.2079908168393867438e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T P2[] = {
+    BOOST_MATH_STATIC const T P2[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.1514276357909013326e+19)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -5.6808094574724204577e+18)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -2.3638408497043134724e+16)),
@@ -70,7 +71,7 @@ T bessel_y1(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.9153806858264202986e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.2337180442012953128e+03)),
     };
-    static const T Q2[] = {
+    BOOST_MATH_STATIC const T Q2[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.3321844313316185697e+20)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.6968198822857178911e+18)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.0837179548112881950e+16)),
@@ -81,7 +82,7 @@ T bessel_y1(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.2855164849321609336e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T PC[] = {
+    BOOST_MATH_STATIC const T PC[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278571e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9422465050776411957e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -6.6033732483649391093e+06)),
@@ -90,7 +91,7 @@ T bessel_y1(T x, const Policy&)
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.6116166443246101165e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)),
     };
-    static const T QC[] = {
+    BOOST_MATH_STATIC const T QC[] = {
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -4.4357578167941278568e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -9.9341243899345856590e+06)),
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -6.5853394797230870728e+06)),
@@ -99,7 +100,7 @@ T bessel_y1(T x, const Policy&)
         static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -1.4550094401904961825e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T PS[] = {
+    BOOST_MATH_STATIC const T PS[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.3220913409857223519e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.5145160675335701966e+04)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 6.6178836581270835179e+04)),
@@ -108,7 +109,7 @@ T bessel_y1(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3.5265133846636032186e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.0)),
     };
-    static const T QS[] = {
+    BOOST_MATH_STATIC const T QS[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 7.0871281941028743574e+05)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.8194580422439972989e+06)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.4194606696037208929e+06)),
@@ -117,7 +118,7 @@ T bessel_y1(T x, const Policy&)
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 8.6383677696049909675e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.0)),
     };
-    static const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1971413260310170351e+00)),
+    BOOST_MATH_STATIC const T x1  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.1971413260310170351e+00)),
                    x2  =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.4296810407941351328e+00)),
                    x11 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 5.620e+02)),
                    x12 =  static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 1.8288260310170351490e-03)),
diff --git a/include/boost/math/special_functions/detail/bessel_yn.hpp b/include/boost/math/special_functions/detail/bessel_yn.hpp
index 73dee0bbb8..a45d1761cd 100644
--- a/include/boost/math/special_functions/detail/bessel_yn.hpp
+++ b/include/boost/math/special_functions/detail/bessel_yn.hpp
@@ -10,9 +10,11 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/detail/bessel_y0.hpp>
 #include <boost/math/special_functions/detail/bessel_y1.hpp>
 #include <boost/math/special_functions/detail/bessel_jy_series.hpp>
+#include <boost/math/special_functions/sign.hpp>
 #include <boost/math/policies/error_handling.hpp>
 
 // Bessel function of the second kind of integer order
@@ -21,14 +23,14 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T bessel_yn(int n, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T bessel_yn(int n, T x, const Policy& pol)
 {
     BOOST_MATH_STD_USING
     T value, factor, current, prev;
 
     using namespace boost::math::tools;
 
-    static const char* function = "boost::math::bessel_yn<%1%>(%1%,%1%)";
+    constexpr auto function = "boost::math::bessel_yn<%1%>(%1%,%1%)";
 
     if ((x == 0) && (n == 0))
     {
diff --git a/include/boost/math/special_functions/detail/erf_inv.hpp b/include/boost/math/special_functions/detail/erf_inv.hpp
index 0054a74266..cb65cffbc1 100644
--- a/include/boost/math/special_functions/detail/erf_inv.hpp
+++ b/include/boost/math/special_functions/detail/erf_inv.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -13,6 +14,10 @@
 #pragma warning(disable:4702) // Unreachable code: optimization warning
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <type_traits>
 
 namespace boost{ namespace math{
@@ -23,7 +28,7 @@ namespace detail{
 // this version is for 80-bit long double's and smaller:
 //
 template <class T, class Policy>
-T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constant<int, 64>*)
+BOOST_MATH_GPU_ENABLED T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constant<int, 64>&)
 {
    BOOST_MATH_STD_USING // for ADL of std names.
 
@@ -44,8 +49,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
       // Maximum Deviation Found (actual error term at infinite precision) 8.030e-21
       //
       // LCOV_EXCL_START
-      static const float Y = 0.0891314744949340820313f;
-      static const T P[] = {
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.0891314744949340820313f;
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, -0.000508781949658280665617),
          BOOST_MATH_BIG_CONSTANT(T, 64, -0.00836874819741736770379),
          BOOST_MATH_BIG_CONSTANT(T, 64, 0.0334806625409744615033),
@@ -55,7 +60,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
          BOOST_MATH_BIG_CONSTANT(T, 64, 0.00822687874676915743155),
          BOOST_MATH_BIG_CONSTANT(T, 64, -0.00538772965071242932965)
       };
-      static const T Q[] = {
+      BOOST_MATH_STATIC const T Q[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
          BOOST_MATH_BIG_CONSTANT(T, 64, -0.970005043303290640362),
          BOOST_MATH_BIG_CONSTANT(T, 64, -1.56574558234175846809),
@@ -87,8 +92,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
       // Maximum Deviation Found (error term) 4.811e-20
       //
       // LCOV_EXCL_START
-      static const float Y = 2.249481201171875f;
-      static const T P[] = {
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 2.249481201171875f;
+      BOOST_MATH_STATIC const T P[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, -0.202433508355938759655),
          BOOST_MATH_BIG_CONSTANT(T, 64, 0.105264680699391713268),
          BOOST_MATH_BIG_CONSTANT(T, 64, 8.37050328343119927838),
@@ -99,7 +104,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
          BOOST_MATH_BIG_CONSTANT(T, 64, 21.1294655448340526258),
          BOOST_MATH_BIG_CONSTANT(T, 64, -3.67192254707729348546)
       };
-      static const T Q[] = {
+      BOOST_MATH_STATIC const T Q[] = {
          BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
          BOOST_MATH_BIG_CONSTANT(T, 64, 6.24264124854247537712),
          BOOST_MATH_BIG_CONSTANT(T, 64, 3.9713437953343869095),
@@ -142,8 +147,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
       {
          // LCOV_EXCL_START
          // Max error found: 1.089051e-20
-         static const float Y = 0.807220458984375f;
-         static const T P[] = {
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.807220458984375f;
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.131102781679951906451),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.163794047193317060787),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.117030156341995252019),
@@ -156,7 +161,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.285225331782217055858e-7),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.681149956853776992068e-9)
          };
-         static const T Q[] = {
+         BOOST_MATH_STATIC const T Q[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 64, 3.46625407242567245975),
             BOOST_MATH_BIG_CONSTANT(T, 64, 5.38168345707006855425),
@@ -175,8 +180,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
       {
          // LCOV_EXCL_START
          // Max error found: 8.389174e-21
-         static const float Y = 0.93995571136474609375f;
-         static const T P[] = {
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.93995571136474609375f;
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.0350353787183177984712),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.00222426529213447927281),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.0185573306514231072324),
@@ -187,7 +192,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.230404776911882601748e-9),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.266339227425782031962e-11)
          };
-         static const T Q[] = {
+         BOOST_MATH_STATIC const T Q[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.3653349817554063097),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.762059164553623404043),
@@ -205,8 +210,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
       {
          // LCOV_EXCL_START
          // Max error found: 1.481312e-19
-         static const float Y = 0.98362827301025390625f;
-         static const T P[] = {
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.98362827301025390625f;
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.0167431005076633737133),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.00112951438745580278863),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.00105628862152492910091),
@@ -217,7 +222,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.281128735628831791805e-13),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.99055709973310326855e-16)
          };
-         static const T Q[] = {
+         BOOST_MATH_STATIC const T Q[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.591429344886417493481),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.138151865749083321638),
@@ -235,8 +240,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
       {
          // LCOV_EXCL_START
          // Max error found: 5.697761e-20
-         static const float Y = 0.99714565277099609375f;
-         static const T P[] = {
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.99714565277099609375f;
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.0024978212791898131227),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.779190719229053954292e-5),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.254723037413027451751e-4),
@@ -246,7 +251,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.145596286718675035587e-11),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.116765012397184275695e-17)
          };
-         static const T Q[] = {
+         BOOST_MATH_STATIC const T Q[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.207123112214422517181),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.0169410838120975906478),
@@ -264,8 +269,8 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
       {
          // LCOV_EXCL_START
          // Max error found: 1.279746e-20
-         static const float Y = 0.99941349029541015625f;
-         static const T P[] = {
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const float Y = 0.99941349029541015625f;
+         BOOST_MATH_STATIC const T P[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.000539042911019078575891),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.28398759004727721098e-6),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.899465114892291446442e-6),
@@ -275,7 +280,7 @@ T erf_inv_imp(const T& p, const T& q, const Policy&, const std::integral_constan
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.135880130108924861008e-14),
             BOOST_MATH_BIG_CONSTANT(T, 64, -0.348890393399948882918e-21)
          };
-         static const T Q[] = {
+         BOOST_MATH_STATIC const T Q[] = {
             BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.0845746234001899436914),
             BOOST_MATH_BIG_CONSTANT(T, 64, 0.00282092984726264681981),
@@ -310,12 +315,13 @@ struct erf_roots
 };
 
 template <class T, class Policy>
-T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_constant<int, 0>*)
+T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_constant<int, 0>&)
 {
    //
    // Generic version, get a guess that's accurate to 64-bits (10^-19)
    //
-   T guess = erf_inv_imp(p, q, pol, static_cast<std::integral_constant<int, 64> const*>(nullptr));
+   using tag_type = std::integral_constant<int, 64>;
+   T guess = erf_inv_imp(p, q, pol, tag_type());
    T result;
    //
    // If T has more bit's than 64 in it's mantissa then we need to iterate,
@@ -344,14 +350,14 @@ T erf_inv_imp(const T& p, const T& q, const Policy& pol, const std::integral_con
 } // namespace detail
 
 template <class T, class Policy>
-typename tools::promote_args<T>::type erfc_inv(T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type erfc_inv(T z, const Policy& pol)
 {
    typedef typename tools::promote_args<T>::type result_type;
 
    //
    // Begin by testing for domain errors, and other special cases:
    //
-   static const char* function = "boost::math::erfc_inv<%1%>(%1%, %1%)";
+   constexpr auto function = "boost::math::erfc_inv<%1%>(%1%, %1%)";
    if((z < 0) || (z > 2))
       return policies::raise_domain_error<result_type>(function, "Argument outside range [0,2] in inverse erfc function (got p=%1%).", z, pol);
    if(z == 0)
@@ -401,18 +407,18 @@ typename tools::promote_args<T>::type erfc_inv(T z, const Policy& pol)
    // And get the result, negating where required:
    //
    return s * policies::checked_narrowing_cast<result_type, forwarding_policy>(
-      detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), static_cast<tag_type const*>(nullptr)), function);
+      detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), tag_type()), function);
 }
 
 template <class T, class Policy>
-typename tools::promote_args<T>::type erf_inv(T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type erf_inv(T z, const Policy& pol)
 {
    typedef typename tools::promote_args<T>::type result_type;
 
    //
    // Begin by testing for domain errors, and other special cases:
    //
-   static const char* function = "boost::math::erf_inv<%1%>(%1%, %1%)";
+   constexpr auto function = "boost::math::erf_inv<%1%>(%1%, %1%)";
    if((z < -1) || (z > 1))
       return policies::raise_domain_error<result_type>(function, "Argument outside range [-1, 1] in inverse erf function (got p=%1%).", z, pol);
    if(z == 1)
@@ -469,17 +475,17 @@ typename tools::promote_args<T>::type erf_inv(T z, const Policy& pol)
    // And get the result, negating where required:
    //
    return s * policies::checked_narrowing_cast<result_type, forwarding_policy>(
-      detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), static_cast<tag_type const*>(nullptr)), function);
+      detail::erf_inv_imp(static_cast<eval_type>(p), static_cast<eval_type>(q), forwarding_policy(), tag_type()), function);
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type erfc_inv(T z)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erfc_inv(T z)
 {
    return erfc_inv(z, policies::policy<>());
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type erf_inv(T z)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erf_inv(T z)
 {
    return erf_inv(z, policies::policy<>());
 }
@@ -487,6 +493,64 @@ inline typename tools::promote_args<T>::type erf_inv(T z)
 } // namespace math
 } // namespace boost
 
+#else // Special handling for NVRTC
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto erf_inv(T x)
+{
+   return ::erfinv(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED auto erf_inv(float x)
+{
+   return ::erfinvf(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto erf_inv(T x, const Policy&)
+{
+   return ::erfinv(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED auto erf_inv(float x, const Policy&)
+{
+   return ::erfinvf(x);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto erfc_inv(T x)
+{
+   return ::erfcinv(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED auto erfc_inv(float x)
+{
+   return ::erfcinvf(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto erfc_inv(T x, const Policy&)
+{
+   return ::erfcinv(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED auto erfc_inv(float x, const Policy&)
+{
+   return ::erfcinvf(x);
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTV
+
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
diff --git a/include/boost/math/special_functions/detail/fp_traits.hpp b/include/boost/math/special_functions/detail/fp_traits.hpp
index 2947a32a21..015ea9cd35 100644
--- a/include/boost/math/special_functions/detail/fp_traits.hpp
+++ b/include/boost/math/special_functions/detail/fp_traits.hpp
@@ -4,6 +4,7 @@
 #define BOOST_MATH_FP_TRAITS_HPP
 
 // Copyright (c) 2006 Johan Rade
+// Copyright (c) 2024 Matt Borland
 
 // Distributed under the Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -24,6 +25,7 @@ With these techniques, the code could be simplified.
 #include <cstdint>
 #include <limits>
 #include <type_traits>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/is_standalone.hpp>
 #include <boost/math/tools/assert.hpp>
 
@@ -202,14 +204,14 @@ template<> struct fp_traits_non_native<float, single_precision>
 {
     typedef ieee_copy_all_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7f800000;
-    static constexpr uint32_t flag        = 0x00000000;
-    static constexpr uint32_t significand = 0x007fffff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7f800000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00000000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x007fffff;
 
     typedef uint32_t bits;
-    static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); }
-    static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); }
+    BOOST_MATH_GPU_ENABLED static void get_bits(float x, uint32_t& a) { std::memcpy(&a, &x, 4); }
+    BOOST_MATH_GPU_ENABLED static void set_bits(float& x, uint32_t a) { std::memcpy(&x, &a, 4); }
 };
 
 // ieee_tag version, double (64 bits) ----------------------------------------------
@@ -250,15 +252,15 @@ template<> struct fp_traits_non_native<double, double_precision>
 {
     typedef ieee_copy_all_bits_tag method;
 
-    static constexpr uint64_t sign     = static_cast<uint64_t>(0x80000000u) << 32;
-    static constexpr uint64_t exponent = static_cast<uint64_t>(0x7ff00000) << 32;
-    static constexpr uint64_t flag     = 0;
-    static constexpr uint64_t significand
+    BOOST_MATH_STATIC constexpr uint64_t sign     = static_cast<uint64_t>(0x80000000u) << 32;
+    BOOST_MATH_STATIC constexpr uint64_t exponent = static_cast<uint64_t>(0x7ff00000) << 32;
+    BOOST_MATH_STATIC constexpr uint64_t flag     = 0;
+    BOOST_MATH_STATIC constexpr uint64_t significand
         = (static_cast<uint64_t>(0x000fffff) << 32) + static_cast<uint64_t>(0xffffffffu);
 
     typedef uint64_t bits;
-    static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); }
-    static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); }
+    BOOST_MATH_GPU_ENABLED static void get_bits(double x, uint64_t& a) { std::memcpy(&a, &x, 8); }
+    BOOST_MATH_GPU_ENABLED static void set_bits(double& x, uint64_t a) { std::memcpy(&x, &a, 8); }
 };
 
 #endif
@@ -330,10 +332,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7fff0000;
-    static constexpr uint32_t flag        = 0x00008000;
-    static constexpr uint32_t significand = 0x00007fff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7fff0000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00008000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff;
 
     typedef uint32_t bits;
 
@@ -381,10 +383,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7ff00000;
-    static constexpr uint32_t flag        = 0x00000000;
-    static constexpr uint32_t significand = 0x000fffff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7ff00000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00000000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x000fffff;
 
     typedef uint32_t bits;
 
@@ -399,7 +401,7 @@ struct fp_traits_non_native<long double, extended_double_precision>
     }
 
 private:
-    static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
+    BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
 };
 
 
@@ -419,10 +421,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7fff0000;
-    static constexpr uint32_t flag        = 0x00008000;
-    static constexpr uint32_t significand = 0x00007fff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7fff0000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00008000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x00007fff;
 
     // copy 1st, 2nd, 5th and 6th byte. 3rd and 4th byte are padding.
 
@@ -455,10 +457,10 @@ struct fp_traits_non_native<long double, extended_double_precision>
 {
     typedef ieee_copy_leading_bits_tag method;
 
-    static constexpr uint32_t sign        = 0x80000000u;
-    static constexpr uint32_t exponent    = 0x7fff0000;
-    static constexpr uint32_t flag        = 0x00000000;
-    static constexpr uint32_t significand = 0x0000ffff;
+    BOOST_MATH_STATIC constexpr uint32_t sign        = 0x80000000u;
+    BOOST_MATH_STATIC constexpr uint32_t exponent    = 0x7fff0000;
+    BOOST_MATH_STATIC constexpr uint32_t flag        = 0x00000000;
+    BOOST_MATH_STATIC constexpr uint32_t significand = 0x0000ffff;
 
     typedef uint32_t bits;
 
@@ -473,7 +475,7 @@ struct fp_traits_non_native<long double, extended_double_precision>
     }
 
 private:
-    static constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
+    BOOST_MATH_STATIC constexpr int offset_ = BOOST_MATH_ENDIAN_BIG_BYTE ? 0 : 12;
 };
 
 #endif
@@ -553,7 +555,8 @@ struct select_native<long double>
    && !defined(BOOST_MATH_DISABLE_STD_FPCLASSIFY)\
    && !defined(__INTEL_COMPILER)\
    && !defined(sun)\
-   && !defined(__VXWORKS__)
+   && !defined(__VXWORKS__)\
+   && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
 #  define BOOST_MATH_USE_STD_FPCLASSIFY
 #endif
 
diff --git a/include/boost/math/special_functions/detail/gamma_inva.hpp b/include/boost/math/special_functions/detail/gamma_inva.hpp
index 75ac89e433..8c3be8ef1a 100644
--- a/include/boost/math/special_functions/detail/gamma_inva.hpp
+++ b/include/boost/math/special_functions/detail/gamma_inva.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -17,16 +18,23 @@
 #pragma once
 #endif
 
-#include <cstdint>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/toms748_solve.hpp>
 
-namespace boost{ namespace math{ namespace detail{
+namespace boost{ namespace math{ 
+
+#ifdef BOOST_MATH_HAS_NVRTC
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto erfc_inv(T x, const Policy&);
+#endif
+   
+namespace detail{
 
 template <class T, class Policy>
 struct gamma_inva_t
 {
-   gamma_inva_t(T z_, T p_, bool invert_) : z(z_), p(p_), invert(invert_) {}
-   T operator()(T a)
+   BOOST_MATH_GPU_ENABLED gamma_inva_t(T z_, T p_, bool invert_) : z(z_), p(p_), invert(invert_) {}
+   BOOST_MATH_GPU_ENABLED T operator()(T a)
    {
       return invert ? p - boost::math::gamma_q(a, z, Policy()) : boost::math::gamma_p(a, z, Policy()) - p;
    }
@@ -36,7 +44,7 @@ struct gamma_inva_t
 };
 
 template <class T, class Policy>
-T inverse_poisson_cornish_fisher(T lambda, T p, T q, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T inverse_poisson_cornish_fisher(T lambda, T p, T q, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    // mean:
@@ -67,7 +75,7 @@ T inverse_poisson_cornish_fisher(T lambda, T p, T q, const Policy& pol)
 }
 
 template <class T, class Policy>
-T gamma_inva_imp(const T& z, const T& p, const T& q, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T gamma_inva_imp(const T& z, const T& p, const T& q, const Policy& pol)
 {
    BOOST_MATH_STD_USING  // for ADL of std lib math functions
    //
@@ -151,7 +159,7 @@ T gamma_inva_imp(const T& z, const T& p, const T& q, const Policy& pol)
 } // namespace detail
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_p_inva(T1 x, T2 p, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
@@ -181,7 +189,7 @@ inline typename tools::promote_args<T1, T2>::type
 }
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_q_inva(T1 x, T2 q, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
@@ -211,14 +219,14 @@ inline typename tools::promote_args<T1, T2>::type
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_p_inva(T1 x, T2 p)
 {
    return boost::math::gamma_p_inva(x, p, policies::policy<>());
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_q_inva(T1 x, T2 q)
 {
    return boost::math::gamma_q_inva(x, q, policies::policy<>());
diff --git a/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp b/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp
index 0ce0d7560e..aab18f50f1 100644
--- a/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp
+++ b/include/boost/math/special_functions/detail/ibeta_inv_ab.hpp
@@ -17,17 +17,19 @@
 #pragma once
 #endif
 
-#include <cstdint>
-#include <utility>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/toms748_solve.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
 struct beta_inv_ab_t
 {
-   beta_inv_ab_t(T b_, T z_, T p_, bool invert_, bool swap_ab_) : b(b_), z(z_), p(p_), invert(invert_), swap_ab(swap_ab_) {}
-   T operator()(T a)
+   BOOST_MATH_GPU_ENABLED beta_inv_ab_t(T b_, T z_, T p_, bool invert_, bool swap_ab_) : b(b_), z(z_), p(p_), invert(invert_), swap_ab(swap_ab_) {}
+   BOOST_MATH_GPU_ENABLED T operator()(T a)
    {
       return invert ? 
          p - boost::math::ibetac(swap_ab ? b : a, swap_ab ? a : b, z, Policy()) 
@@ -39,7 +41,7 @@ struct beta_inv_ab_t
 };
 
 template <class T, class Policy>
-T inverse_negative_binomial_cornish_fisher(T n, T sf, T sfc, T p, T q, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T inverse_negative_binomial_cornish_fisher(T n, T sf, T sfc, T p, T q, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    // mean:
@@ -72,7 +74,7 @@ T inverse_negative_binomial_cornish_fisher(T n, T sf, T sfc, T p, T q, const Pol
 }
 
 template <class T, class Policy>
-T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab, const Policy& pol)
 {
    BOOST_MATH_STD_USING  // for ADL of std lib math functions
    //
@@ -121,11 +123,11 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab,
       //
       if((p < q) != swap_ab)
       {
-         guess = (std::min)(T(b * 2), T(1));
+         guess = BOOST_MATH_GPU_SAFE_MIN(T(b * 2), T(1));
       }
       else
       {
-         guess = (std::min)(T(b / 2), T(1));
+         guess = BOOST_MATH_GPU_SAFE_MIN(T(b / 2), T(1));
       }
    }
    if(n * n * n * u * sf > 0.005)
@@ -138,11 +140,11 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab,
       //
       if((p < q) != swap_ab)
       {
-         guess = (std::min)(T(b * 2), T(10));
+         guess = BOOST_MATH_GPU_SAFE_MIN(T(b * 2), T(10));
       }
       else
       {
-         guess = (std::min)(T(b / 2), T(10));
+         guess = BOOST_MATH_GPU_SAFE_MIN(T(b / 2), T(10));
       }
    }
    else
@@ -151,8 +153,8 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab,
    //
    // Max iterations permitted:
    //
-   std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
-   std::pair<T, T> r = bracket_and_solve_root(f, guess, factor, swap_ab ? true : false, tol, max_iter, pol);
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+   boost::math::pair<T, T> r = bracket_and_solve_root(f, guess, factor, swap_ab ? true : false, tol, max_iter, pol);
    if(max_iter >= policies::get_max_root_iterations<Policy>())
       return policies::raise_evaluation_error<T>("boost::math::ibeta_invab_imp<%1%>(%1%,%1%,%1%)", "Unable to locate the root within a reasonable number of iterations, closest approximation so far was %1%", r.first, pol);
    return (r.first + r.second) / 2;
@@ -161,7 +163,7 @@ T ibeta_inv_ab_imp(const T& b, const T& z, const T& p, const T& q, bool swap_ab,
 } // namespace detail
 
 template <class RT1, class RT2, class RT3, class Policy>
-typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<RT1, RT2, RT3>::type 
       ibeta_inva(RT1 b, RT2 x, RT3 p, const Policy& pol)
 {
    typedef typename tools::promote_args<RT1, RT2, RT3>::type result_type;
@@ -173,7 +175,7 @@ typename tools::promote_args<RT1, RT2, RT3>::type
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   static const char* function = "boost::math::ibeta_inva<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ibeta_inva<%1%>(%1%,%1%,%1%)";
    if(p == 0)
    {
       return policies::raise_overflow_error<result_type>(function, 0, Policy());
@@ -185,28 +187,28 @@ typename tools::promote_args<RT1, RT2, RT3>::type
 
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(
       detail::ibeta_inv_ab_imp(
-         static_cast<value_type>(b), 
-         static_cast<value_type>(x), 
-         static_cast<value_type>(p), 
-         static_cast<value_type>(1 - static_cast<value_type>(p)), 
-         false, pol), 
+         static_cast<value_type>(b),
+         static_cast<value_type>(x),
+         static_cast<value_type>(p),
+         static_cast<value_type>(1 - static_cast<value_type>(p)),
+         false, pol),
       function);
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<RT1, RT2, RT3>::type
       ibetac_inva(RT1 b, RT2 x, RT3 q, const Policy& pol)
 {
    typedef typename tools::promote_args<RT1, RT2, RT3>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
    typedef typename policies::normalise<
-      Policy, 
-      policies::promote_float<false>, 
-      policies::promote_double<false>, 
+      Policy,
+      policies::promote_float<false>,
+      policies::promote_double<false>,
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   static const char* function = "boost::math::ibetac_inva<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ibetac_inva<%1%>(%1%,%1%,%1%)";
    if(q == 1)
    {
       return policies::raise_overflow_error<result_type>(function, 0, Policy());
@@ -218,28 +220,28 @@ typename tools::promote_args<RT1, RT2, RT3>::type
 
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(
       detail::ibeta_inv_ab_imp(
-         static_cast<value_type>(b), 
-         static_cast<value_type>(x), 
-         static_cast<value_type>(1 - static_cast<value_type>(q)), 
-         static_cast<value_type>(q), 
+         static_cast<value_type>(b),
+         static_cast<value_type>(x),
+         static_cast<value_type>(1 - static_cast<value_type>(q)),
+         static_cast<value_type>(q),
          false, pol),
       function);
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<RT1, RT2, RT3>::type
       ibeta_invb(RT1 a, RT2 x, RT3 p, const Policy& pol)
 {
    typedef typename tools::promote_args<RT1, RT2, RT3>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
    typedef typename policies::normalise<
-      Policy, 
-      policies::promote_float<false>, 
-      policies::promote_double<false>, 
+      Policy,
+      policies::promote_float<false>,
+      policies::promote_double<false>,
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   static const char* function = "boost::math::ibeta_invb<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ibeta_invb<%1%>(%1%,%1%,%1%)";
    if(p == 0)
    {
       return tools::min_value<result_type>();
@@ -251,19 +253,19 @@ typename tools::promote_args<RT1, RT2, RT3>::type
 
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(
       detail::ibeta_inv_ab_imp(
-         static_cast<value_type>(a), 
-         static_cast<value_type>(x), 
-         static_cast<value_type>(p), 
-         static_cast<value_type>(1 - static_cast<value_type>(p)), 
+         static_cast<value_type>(a),
+         static_cast<value_type>(x),
+         static_cast<value_type>(p),
+         static_cast<value_type>(1 - static_cast<value_type>(p)),
          true, pol),
       function);
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<RT1, RT2, RT3>::type
       ibetac_invb(RT1 a, RT2 x, RT3 q, const Policy& pol)
 {
-   static const char* function = "boost::math::ibeta_invb<%1%>(%1%, %1%, %1%)";
+   constexpr auto function = "boost::math::ibeta_invb<%1%>(%1%, %1%, %1%)";
    typedef typename tools::promote_args<RT1, RT2, RT3>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
    typedef typename policies::normalise<
@@ -293,28 +295,28 @@ typename tools::promote_args<RT1, RT2, RT3>::type
 }
 
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type 
          ibeta_inva(RT1 b, RT2 x, RT3 p)
 {
    return boost::math::ibeta_inva(b, x, p, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type 
          ibetac_inva(RT1 b, RT2 x, RT3 q)
 {
    return boost::math::ibetac_inva(b, x, q, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type 
          ibeta_invb(RT1 a, RT2 x, RT3 p)
 {
    return boost::math::ibeta_invb(a, x, p, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type 
          ibetac_invb(RT1 a, RT2 x, RT3 q)
 {
    return boost::math::ibetac_invb(a, x, q, policies::policy<>());
diff --git a/include/boost/math/special_functions/detail/ibeta_inverse.hpp b/include/boost/math/special_functions/detail/ibeta_inverse.hpp
index 70f17a0b1a..6f222cf77d 100644
--- a/include/boost/math/special_functions/detail/ibeta_inverse.hpp
+++ b/include/boost/math/special_functions/detail/ibeta_inverse.hpp
@@ -11,12 +11,14 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/roots.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/special_functions/beta.hpp>
 #include <boost/math/special_functions/erf.hpp>
-#include <boost/math/tools/roots.hpp>
 #include <boost/math/special_functions/detail/t_distribution_inv.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
-#include <boost/math/tools/precision.hpp>
 
 namespace boost{ namespace math{ namespace detail{
 
@@ -27,12 +29,12 @@ namespace boost{ namespace math{ namespace detail{
 template <class T>
 struct temme_root_finder
 {
-   temme_root_finder(const T t_, const T a_) : t(t_), a(a_) {
+   BOOST_MATH_GPU_ENABLED temme_root_finder(const T t_, const T a_) : t(t_), a(a_) {
       BOOST_MATH_ASSERT(
          math::tools::epsilon<T>() <= a && !(boost::math::isinf)(a));
    }
 
-   boost::math::tuple<T, T> operator()(T x)
+   BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T> operator()(T x)
    {
       BOOST_MATH_STD_USING // ADL of std names
 
@@ -52,7 +54,7 @@ struct temme_root_finder
 // Section 2.
 //
 template <class T, class Policy>
-T temme_method_1_ibeta_inverse(T a, T b, T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T temme_method_1_ibeta_inverse(T a, T b, T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -138,7 +140,7 @@ T temme_method_1_ibeta_inverse(T a, T b, T z, const Policy& pol)
 // Section 3.
 //
 template <class T, class Policy>
-T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -302,9 +304,23 @@ T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy
    //
    // And iterate:
    //
-   x = tools::newton_raphson_iterate(
-      temme_root_finder<T>(-lu, alpha), x, lower, upper, policies::digits<T, Policy>() / 2);
-
+#ifndef BOOST_MATH_NO_EXCEPTIONS
+   try {
+#endif
+      x = tools::newton_raphson_iterate(
+         temme_root_finder<T>(-lu, alpha), x, lower, upper, policies::digits<T, Policy>() / 2);
+#ifndef BOOST_MATH_NO_EXCEPTIONS
+   }
+   catch (const boost::math::evaluation_error&)
+   {
+      // Due to numerical instability we may have cases where no root is found when
+      // in fact we should just touch the origin.  We simply ignore the error here
+      // and return our best guess for x so far...
+      // Maybe we should special case the symmetrical parameter case, but it's not clear 
+      // whether that is the only situation when problems can occur.
+      // See https://github.com/boostorg/math/issues/1169
+   }
+#endif
    return x;
 }
 //
@@ -315,10 +331,11 @@ T temme_method_2_ibeta_inverse(T /*a*/, T /*b*/, T z, T r, T theta, const Policy
 // Section 4.
 //
 template <class T, class Policy>
-T temme_method_3_ibeta_inverse(T a, T b, T p, T q, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T temme_method_3_ibeta_inverse(T a, T b, T p, T q, const Policy& pol)
 {
    BOOST_MATH_STD_USING // ADL of std names
 
+
    //
    // Begin by getting an initial approximation for the quantity
    // eta from the dominant part of the incomplete beta:
@@ -420,10 +437,10 @@ T temme_method_3_ibeta_inverse(T a, T b, T p, T q, const Policy& pol)
 template <class T, class Policy>
 struct ibeta_roots
 {
-   ibeta_roots(T _a, T _b, T t, bool inv = false)
+   BOOST_MATH_GPU_ENABLED ibeta_roots(T _a, T _b, T t, bool inv = false)
       : a(_a), b(_b), target(t), invert(inv) {}
 
-   boost::math::tuple<T, T, T> operator()(T x)
+   BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T, T> operator()(T x)
    {
       BOOST_MATH_STD_USING // ADL of std names
 
@@ -457,7 +474,7 @@ struct ibeta_roots
 };
 
 template <class T, class Policy>
-T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
+BOOST_MATH_GPU_ENABLED T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
 {
    BOOST_MATH_STD_USING  // For ADL of math functions.
 
@@ -487,8 +504,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
          return p;
       }
       // Change things around so we can handle as b == 1 special case below:
-      std::swap(a, b);
-      std::swap(p, q);
+      BOOST_MATH_GPU_SAFE_SWAP(a, b);
+      BOOST_MATH_GPU_SAFE_SWAP(p, q);
       invert = true;
    }
    //
@@ -524,8 +541,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
       }
       else if(b > 0.5f)
       {
-         std::swap(a, b);
-         std::swap(p, q);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(p, q);
          invert = !invert;
       }
    }
@@ -559,7 +576,7 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
          y = -boost::math::expm1(boost::math::log1p(-q, pol) / a, pol);
       }
       if(invert)
-         std::swap(x, y);
+         BOOST_MATH_GPU_SAFE_SWAP(x, y);
       if(py)
          *py = y;
       return x;
@@ -574,12 +591,12 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
       //
       if(p > 0.5)
       {
-         std::swap(a, b);
-         std::swap(p, q);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(p, q);
          invert = !invert;
       }
-      T minv = (std::min)(a, b);
-      T maxv = (std::max)(a, b);
+      T minv = BOOST_MATH_GPU_SAFE_MIN(a, b);
+      T maxv = BOOST_MATH_GPU_SAFE_MAX(a, b);
       if((sqrt(minv) > (maxv - minv)) && (minv > 5))
       {
          //
@@ -630,8 +647,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
             //
             if(a < b)
             {
-               std::swap(a, b);
-               std::swap(p, q);
+               BOOST_MATH_GPU_SAFE_SWAP(a, b);
+               BOOST_MATH_GPU_SAFE_SWAP(p, q);
                invert = !invert;
             }
             //
@@ -694,8 +711,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
       }
       if(fs < 0)
       {
-         std::swap(a, b);
-         std::swap(p, q);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(p, q);
          invert = !invert;
          xs = 1 - xs;
       }
@@ -758,9 +775,9 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
 
       if(ps < 0)
       {
-         std::swap(a, b);
-         std::swap(p, q);
-         std::swap(xs, xs2);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(p, q);
+         BOOST_MATH_GPU_SAFE_SWAP(xs, xs2);
          invert = !invert;
       }
       //
@@ -823,8 +840,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
       //
       if(b < a)
       {
-         std::swap(a, b);
-         std::swap(p, q);
+         BOOST_MATH_GPU_SAFE_SWAP(a, b);
+         BOOST_MATH_GPU_SAFE_SWAP(p, q);
          invert = !invert;
       }
       if (a < tools::min_value<T>())
@@ -890,9 +907,9 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
    //
    if(x > 0.5)
    {
-      std::swap(a, b);
-      std::swap(p, q);
-      std::swap(x, y);
+      BOOST_MATH_GPU_SAFE_SWAP(a, b);
+      BOOST_MATH_GPU_SAFE_SWAP(p, q);
+      BOOST_MATH_GPU_SAFE_SWAP(x, y);
       invert = !invert;
       T l = 1 - upper;
       T u = 1 - lower;
@@ -922,8 +939,8 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
       if(x < lower)
          x = lower;
    }
-   std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
-   std::uintmax_t max_iter_used = 0;
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+   boost::math::uintmax_t max_iter_used = 0;
    //
    // Figure out how many digits to iterate towards:
    //
@@ -946,7 +963,13 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
    // Now iterate, we can use either p or q as the target here
    // depending on which is smaller:
    //
+   // Since we can't use halley_iterate on device we use newton raphson
+   //
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    x = boost::math::tools::halley_iterate(
+   #else
+   x = boost::math::tools::newton_raphson_iterate(
+   #endif
       boost::math::detail::ibeta_roots<T, Policy>(a, b, (p < q ? p : q), (p < q ? false : true)), x, lower, upper, digits, max_iter);
    policies::check_root_iterations<T>("boost::math::ibeta<%1%>(%1%, %1%, %1%)", max_iter + max_iter_used, pol);
    //
@@ -968,10 +991,10 @@ T ibeta_inv_imp(T a, T b, T p, T q, const Policy& pol, T* py)
 } // namespace detail
 
 template <class T1, class T2, class T3, class T4, class Policy>
-inline typename tools::promote_args<T1, T2, T3, T4>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3, T4>::type
    ibeta_inv(T1 a, T2 b, T3 p, T4* py, const Policy& pol)
 {
-   static const char* function = "boost::math::ibeta_inv<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ibeta_inv<%1%>(%1%,%1%,%1%)";
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<T1, T2, T3, T4>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -1003,14 +1026,14 @@ inline typename tools::promote_args<T1, T2, T3, T4>::type
 }
 
 template <class T1, class T2, class T3, class T4>
-inline typename tools::promote_args<T1, T2, T3, T4>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3, T4>::type
    ibeta_inv(T1 a, T2 b, T3 p, T4* py)
 {
    return ibeta_inv(a, b, p, py, policies::policy<>());
 }
 
 template <class T1, class T2, class T3>
-inline typename tools::promote_args<T1, T2, T3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type
    ibeta_inv(T1 a, T2 b, T3 p)
 {
    typedef typename tools::promote_args<T1, T2, T3>::type result_type;
@@ -1018,7 +1041,7 @@ inline typename tools::promote_args<T1, T2, T3>::type
 }
 
 template <class T1, class T2, class T3, class Policy>
-inline typename tools::promote_args<T1, T2, T3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type
    ibeta_inv(T1 a, T2 b, T3 p, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2, T3>::type result_type;
@@ -1026,10 +1049,10 @@ inline typename tools::promote_args<T1, T2, T3>::type
 }
 
 template <class T1, class T2, class T3, class T4, class Policy>
-inline typename tools::promote_args<T1, T2, T3, T4>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3, T4>::type
    ibetac_inv(T1 a, T2 b, T3 q, T4* py, const Policy& pol)
 {
-   static const char* function = "boost::math::ibetac_inv<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ibetac_inv<%1%>(%1%,%1%,%1%)";
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<T1, T2, T3, T4>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -1061,14 +1084,14 @@ inline typename tools::promote_args<T1, T2, T3, T4>::type
 }
 
 template <class T1, class T2, class T3, class T4>
-inline typename tools::promote_args<T1, T2, T3, T4>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3, T4>::type
    ibetac_inv(T1 a, T2 b, T3 q, T4* py)
 {
    return ibetac_inv(a, b, q, py, policies::policy<>());
 }
 
 template <class RT1, class RT2, class RT3>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibetac_inv(RT1 a, RT2 b, RT3 q)
 {
    typedef typename tools::promote_args<RT1, RT2, RT3>::type result_type;
@@ -1076,7 +1099,7 @@ inline typename tools::promote_args<RT1, RT2, RT3>::type
 }
 
 template <class RT1, class RT2, class RT3, class Policy>
-inline typename tools::promote_args<RT1, RT2, RT3>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, RT3>::type
    ibetac_inv(RT1 a, RT2 b, RT3 q, const Policy& pol)
 {
    typedef typename tools::promote_args<RT1, RT2, RT3>::type result_type;
diff --git a/include/boost/math/special_functions/detail/iconv.hpp b/include/boost/math/special_functions/detail/iconv.hpp
index 90b4aa9381..20889d411e 100644
--- a/include/boost/math/special_functions/detail/iconv.hpp
+++ b/include/boost/math/special_functions/detail/iconv.hpp
@@ -10,28 +10,29 @@
 #pragma once
 #endif
 
-#include <type_traits>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/round.hpp>
 
 namespace boost { namespace math { namespace detail{
 
 template <class T, class Policy>
-inline int iconv_imp(T v, Policy const&, std::true_type const&)
+BOOST_MATH_GPU_ENABLED inline int iconv_imp(T v, Policy const&, boost::math::true_type const&)
 {
    return static_cast<int>(v);
 }
 
 template <class T, class Policy>
-inline int iconv_imp(T v, Policy const& pol, std::false_type const&)
+BOOST_MATH_GPU_ENABLED inline int iconv_imp(T v, Policy const& pol, boost::math::false_type const&)
 {
    BOOST_MATH_STD_USING
    return iround(v, pol);
 }
 
 template <class T, class Policy>
-inline int iconv(T v, Policy const& pol)
+BOOST_MATH_GPU_ENABLED inline int iconv(T v, Policy const& pol)
 {
-   typedef typename std::is_convertible<T, int>::type tag_type;
+   typedef typename boost::math::is_convertible<T, int>::type tag_type;
    return iconv_imp(v, pol, tag_type());
 }
 
diff --git a/include/boost/math/special_functions/detail/igamma_inverse.hpp b/include/boost/math/special_functions/detail/igamma_inverse.hpp
index f6bbcd72d5..4efd4f78a3 100644
--- a/include/boost/math/special_functions/detail/igamma_inverse.hpp
+++ b/include/boost/math/special_functions/detail/igamma_inverse.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,6 +11,8 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/tools/tuple.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/special_functions/sign.hpp>
@@ -21,7 +24,7 @@ namespace boost{ namespace math{
 namespace detail{
 
 template <class T>
-T find_inverse_s(T p, T q)
+BOOST_MATH_GPU_ENABLED T find_inverse_s(T p, T q)
 {
    //
    // Computation of the Incomplete Gamma Function Ratios and their Inverse
@@ -41,8 +44,8 @@ T find_inverse_s(T p, T q)
    {
       t = sqrt(-2 * log(q));
    }
-   static const double a[4] = { 3.31125922108741, 11.6616720288968, 4.28342155967104, 0.213623493715853 };
-   static const double b[5] = { 1, 6.61053765625462, 6.40691597760039, 1.27364489782223, 0.3611708101884203e-1 };
+   BOOST_MATH_STATIC const double a[4] = { 3.31125922108741, 11.6616720288968, 4.28342155967104, 0.213623493715853 };
+   BOOST_MATH_STATIC const double b[5] = { 1, 6.61053765625462, 6.40691597760039, 1.27364489782223, 0.3611708101884203e-1 };
    T s = t - tools::evaluate_polynomial(a, t) / tools::evaluate_polynomial(b, t);
    if(p < T(0.5))
       s = -s;
@@ -50,7 +53,7 @@ T find_inverse_s(T p, T q)
 }
 
 template <class T>
-T didonato_SN(T a, T x, unsigned N, T tolerance = 0)
+BOOST_MATH_GPU_ENABLED T didonato_SN(T a, T x, unsigned N, T tolerance = 0)
 {
    //
    // Computation of the Incomplete Gamma Function Ratios and their Inverse
@@ -77,7 +80,7 @@ T didonato_SN(T a, T x, unsigned N, T tolerance = 0)
 }
 
 template <class T, class Policy>
-inline T didonato_FN(T p, T a, T x, unsigned N, T tolerance, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T didonato_FN(T p, T a, T x, unsigned N, T tolerance, const Policy& pol)
 {
    //
    // Computation of the Incomplete Gamma Function Ratios and their Inverse
@@ -93,7 +96,7 @@ inline T didonato_FN(T p, T a, T x, unsigned N, T tolerance, const Policy& pol)
 }
 
 template <class T, class Policy>
-T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits)
+BOOST_MATH_GPU_ENABLED T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits)
 {
    //
    // In order to understand what's going on here, you will
@@ -233,7 +236,7 @@ T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits)
          }
          else
          {
-            T D = (std::max)(T(2), T(a * (a - 1)));
+            T D = BOOST_MATH_GPU_SAFE_MAX(T(2), T(a * (a - 1)));
             T lg = boost::math::lgamma(a, pol);
             T lb = log(q) + lg;
             if(lb < -D * T(2.3))
@@ -315,7 +318,7 @@ T find_inverse_gamma(T a, T p, T q, const Policy& pol, bool* p_has_10_digits)
 template <class T, class Policy>
 struct gamma_p_inverse_func
 {
-   gamma_p_inverse_func(T a_, T p_, bool inv) : a(a_), p(p_), invert(inv)
+   BOOST_MATH_GPU_ENABLED gamma_p_inverse_func(T a_, T p_, bool inv) : a(a_), p(p_), invert(inv)
    {
       //
       // If p is too near 1 then P(x) - p suffers from cancellation
@@ -333,7 +336,7 @@ struct gamma_p_inverse_func
       }
    }
 
-   boost::math::tuple<T, T, T> operator()(const T& x)const
+   BOOST_MATH_GPU_ENABLED boost::math::tuple<T, T, T> operator()(const T& x)const
    {
       BOOST_FPU_EXCEPTION_GUARD
       //
@@ -395,11 +398,11 @@ struct gamma_p_inverse_func
 };
 
 template <class T, class Policy>
-T gamma_p_inv_imp(T a, T p, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T gamma_p_inv_imp(T a, T p, const Policy& pol)
 {
    BOOST_MATH_STD_USING  // ADL of std functions.
 
-   static const char* function = "boost::math::gamma_p_inv<%1%>(%1%, %1%)";
+   constexpr auto function = "boost::math::gamma_p_inv<%1%>(%1%, %1%)";
 
    BOOST_MATH_INSTRUMENT_VARIABLE(a);
    BOOST_MATH_INSTRUMENT_VARIABLE(p);
@@ -442,7 +445,9 @@ T gamma_p_inv_imp(T a, T p, const Policy& pol)
    //
    // Go ahead and iterate:
    //
-   std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+   
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    guess = tools::halley_iterate(
       detail::gamma_p_inverse_func<T, Policy>(a, p, false),
       guess,
@@ -450,6 +455,16 @@ T gamma_p_inv_imp(T a, T p, const Policy& pol)
       tools::max_value<T>(),
       digits,
       max_iter);
+   #else
+   guess = tools::newton_raphson_iterate(
+      detail::gamma_p_inverse_func<T, Policy>(a, p, false),
+      guess,
+      lower,
+      tools::max_value<T>(),
+      digits,
+      max_iter);
+   #endif
+   
    policies::check_root_iterations<T>(function, max_iter, pol);
    BOOST_MATH_INSTRUMENT_VARIABLE(guess);
    if(guess == lower)
@@ -458,11 +473,11 @@ T gamma_p_inv_imp(T a, T p, const Policy& pol)
 }
 
 template <class T, class Policy>
-T gamma_q_inv_imp(T a, T q, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T gamma_q_inv_imp(T a, T q, const Policy& pol)
 {
    BOOST_MATH_STD_USING  // ADL of std functions.
 
-   static const char* function = "boost::math::gamma_q_inv<%1%>(%1%, %1%)";
+   constexpr auto function = "boost::math::gamma_q_inv<%1%>(%1%, %1%)";
 
    if(a <= 0)
       return policies::raise_domain_error<T>(function, "Argument a in the incomplete gamma function inverse must be >= 0 (got a=%1%).", a, pol);
@@ -501,7 +516,9 @@ T gamma_q_inv_imp(T a, T q, const Policy& pol)
    //
    // Go ahead and iterate:
    //
-   std::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>();
+
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    guess = tools::halley_iterate(
       detail::gamma_p_inverse_func<T, Policy>(a, q, true),
       guess,
@@ -509,6 +526,16 @@ T gamma_q_inv_imp(T a, T q, const Policy& pol)
       tools::max_value<T>(),
       digits,
       max_iter);
+   #else
+   guess = tools::newton_raphson_iterate(
+      detail::gamma_p_inverse_func<T, Policy>(a, q, true),
+      guess,
+      lower,
+      tools::max_value<T>(),
+      digits,
+      max_iter);
+   #endif
+
    policies::check_root_iterations<T>(function, max_iter, pol);
    if(guess == lower)
       guess = policies::raise_underflow_error<T>(function, "Expected result known to be non-zero, but is smaller than the smallest available number.", pol);
@@ -518,7 +545,7 @@ T gamma_q_inv_imp(T a, T q, const Policy& pol)
 } // namespace detail
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_p_inv(T1 a, T2 p, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
@@ -528,7 +555,7 @@ inline typename tools::promote_args<T1, T2>::type
 }
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_q_inv(T1 a, T2 p, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
@@ -538,14 +565,14 @@ inline typename tools::promote_args<T1, T2>::type
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_p_inv(T1 a, T2 p)
 {
    return gamma_p_inv(a, p, policies::policy<>());
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    gamma_q_inv(T1 a, T2 p)
 {
    return gamma_q_inv(a, p, policies::policy<>());
diff --git a/include/boost/math/special_functions/detail/igamma_large.hpp b/include/boost/math/special_functions/detail/igamma_large.hpp
index 5483b53fb6..8e0ad1b0dd 100644
--- a/include/boost/math/special_functions/detail/igamma_large.hpp
+++ b/include/boost/math/special_functions/detail/igamma_large.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2006.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -59,13 +60,16 @@
 #pragma GCC system_header
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+
 namespace boost{ namespace math{ namespace detail{
 
 // This version will never be called (at runtime), it's a stub used
 // when T is unsuitable to be passed to these routines:
 //
 template <class T, class Policy>
-inline T igamma_temme_large(T, T, const Policy& /* pol */, std::integral_constant<int, 0> const *)
+BOOST_MATH_GPU_ENABLED inline T igamma_temme_large(T, T, const Policy& /* pol */, const boost::math::integral_constant<int, 0>&)
 {
    // stub function, should never actually be called
    BOOST_MATH_ASSERT(0);
@@ -75,8 +79,11 @@ inline T igamma_temme_large(T, T, const Policy& /* pol */, std::integral_constan
 // This version is accurate for up to 64-bit mantissa's, 
 // (80-bit long double, or 10^-20).
 //
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 template <class T, class Policy>
-T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64> const *)
+BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant<int, 64>&)
 {
    BOOST_MATH_STD_USING // ADL of std functions
    T sigma = (x - a) / a;
@@ -88,7 +95,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
 
    T workspace[13];
 
-   static const T C0[] = {
+   BOOST_MATH_STATIC const T C0[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.333333333333333333333),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.0833333333333333333333),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.0148148148148148148148),
@@ -111,7 +118,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[0] = tools::evaluate_polynomial(C0, z);
 
-   static const T C1[] = {
+   BOOST_MATH_STATIC const T C1[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.00185185185185185185185),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.00347222222222222222222),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.00264550264550264550265),
@@ -132,7 +139,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[1] = tools::evaluate_polynomial(C1, z);
 
-   static const T C2[] = {
+   BOOST_MATH_STATIC const T C2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.00413359788359788359788),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.00268132716049382716049),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000771604938271604938272),
@@ -151,7 +158,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[2] = tools::evaluate_polynomial(C2, z);
 
-   static const T C3[] = {
+   BOOST_MATH_STATIC const T C3[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000649434156378600823045),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000229472093621399176955),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000469189494395255712128),
@@ -168,7 +175,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[3] = tools::evaluate_polynomial(C3, z);
 
-   static const T C4[] = {
+   BOOST_MATH_STATIC const T C4[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000861888290916711698605),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000784039221720066627474),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000299072480303190179733),
@@ -183,7 +190,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[4] = tools::evaluate_polynomial(C4, z);
 
-   static const T C5[] = {
+   BOOST_MATH_STATIC const T C5[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000336798553366358150309),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.697281375836585777429e-4),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000277275324495939207873),
@@ -196,7 +203,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[5] = tools::evaluate_polynomial(C5, z);
 
-   static const T C6[] = {
+   BOOST_MATH_STATIC const T C6[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000531307936463992223166),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000592166437353693882865),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000270878209671804482771),
@@ -211,7 +218,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[6] = tools::evaluate_polynomial(C6, z);
 
-   static const T C7[] = {
+   BOOST_MATH_STATIC const T C7[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000344367606892377671254),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.517179090826059219337e-4),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000334931610811422363117),
@@ -224,7 +231,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[7] = tools::evaluate_polynomial(C7, z);
 
-   static const T C8[] = {
+   BOOST_MATH_STATIC const T C8[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000652623918595309418922),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000839498720672087279993),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000438297098541721005061),
@@ -235,7 +242,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[8] = tools::evaluate_polynomial(C8, z);
 
-   static const T C9[] = {
+   BOOST_MATH_STATIC const T C9[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.000596761290192746250124),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.720489541602001055909e-4),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000678230883766732836162),
@@ -244,14 +251,14 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[9] = tools::evaluate_polynomial(C9, z);
 
-   static const T C10[] = {
+   BOOST_MATH_STATIC const T C10[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.00133244544948006563713),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.0019144384985654775265),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.00110893691345966373396),
    };
    workspace[10] = tools::evaluate_polynomial(C10, z);
 
-   static const T C11[] = {
+   BOOST_MATH_STATIC const T C11[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.00157972766073083495909),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.000162516262783915816899),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.00206334210355432762645),
@@ -260,7 +267,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
    };
    workspace[11] = tools::evaluate_polynomial(C11, z);
 
-   static const T C12[] = {
+   BOOST_MATH_STATIC const T C12[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.00407251211951401664727),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.00640336283380806979482),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.00404101610816766177474),
@@ -276,12 +283,15 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 64
 
    return result;
 }
+
+#endif
+
 //
 // This one is accurate for 53-bit mantissa's
 // (IEEE double precision or 10^-17).
 //
 template <class T, class Policy>
-T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53> const *)
+BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant<int, 53>&)
 {
    BOOST_MATH_STD_USING // ADL of std functions
    T sigma = (x - a) / a;
@@ -293,7 +303,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
 
    T workspace[10];
 
-   static const T C0[] = {
+   BOOST_MATH_STATIC const T C0[] = {
       static_cast<T>(-0.33333333333333333L),
       static_cast<T>(0.083333333333333333L),
       static_cast<T>(-0.014814814814814815L),
@@ -312,7 +322,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[0] = tools::evaluate_polynomial(C0, z);
 
-   static const T C1[] = {
+   BOOST_MATH_STATIC const T C1[] = {
       static_cast<T>(-0.0018518518518518519L),
       static_cast<T>(-0.0034722222222222222L),
       static_cast<T>(0.0026455026455026455L),
@@ -329,7 +339,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[1] = tools::evaluate_polynomial(C1, z);
 
-   static const T C2[] = {
+   BOOST_MATH_STATIC const T C2[] = {
       static_cast<T>(0.0041335978835978836L),
       static_cast<T>(-0.0026813271604938272L),
       static_cast<T>(0.00077160493827160494L),
@@ -344,7 +354,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[2] = tools::evaluate_polynomial(C2, z);
 
-   static const T C3[] = {
+   BOOST_MATH_STATIC const T C3[] = {
       static_cast<T>(0.00064943415637860082L),
       static_cast<T>(0.00022947209362139918L),
       static_cast<T>(-0.00046918949439525571L),
@@ -357,7 +367,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[3] = tools::evaluate_polynomial(C3, z);
 
-   static const T C4[] = {
+   BOOST_MATH_STATIC const T C4[] = {
       static_cast<T>(-0.0008618882909167117L),
       static_cast<T>(0.00078403922172006663L),
       static_cast<T>(-0.00029907248030319018L),
@@ -368,7 +378,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[4] = tools::evaluate_polynomial(C4, z);
 
-   static const T C5[] = {
+   BOOST_MATH_STATIC const T C5[] = {
       static_cast<T>(-0.00033679855336635815L),
       static_cast<T>(-0.69728137583658578e-4L),
       static_cast<T>(0.00027727532449593921L),
@@ -381,7 +391,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[5] = tools::evaluate_polynomial(C5, z);
 
-   static const T C6[] = {
+   BOOST_MATH_STATIC const T C6[] = {
       static_cast<T>(0.00053130793646399222L),
       static_cast<T>(-0.00059216643735369388L),
       static_cast<T>(0.00027087820967180448L),
@@ -392,7 +402,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[6] = tools::evaluate_polynomial(C6, z);
 
-   static const T C7[] = {
+   BOOST_MATH_STATIC const T C7[] = {
       static_cast<T>(0.00034436760689237767L),
       static_cast<T>(0.51717909082605922e-4L),
       static_cast<T>(-0.00033493161081142236L),
@@ -401,7 +411,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    };
    workspace[7] = tools::evaluate_polynomial(C7, z);
 
-   static const T C8[] = {
+   BOOST_MATH_STATIC const T C8[] = {
       static_cast<T>(-0.00065262391859530942L),
       static_cast<T>(0.00083949872067208728L),
       static_cast<T>(-0.00043829709854172101L),
@@ -414,7 +424,18 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
    if(x < a)
       result = -result;
 
+   #ifdef BOOST_MATH_HAS_NVRTC
+   if (boost::math::is_same_v<T, float>)
+   {
+      result += ::erfcf(::sqrtf(y)) / 2;
+   }
+   else
+   {
+      result += ::erfc(::sqrt(y)) / 2;
+   }
+   #else
    result += boost::math::erfc(sqrt(y), pol) / 2;
+   #endif
 
    return result;
 }
@@ -423,7 +444,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 53
 // (IEEE float precision, or 10^-8)
 //
 template <class T, class Policy>
-T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 24> const *)
+BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant<int, 24>&)
 {
    BOOST_MATH_STD_USING // ADL of std functions
    T sigma = (x - a) / a;
@@ -435,7 +456,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 24
 
    T workspace[3];
 
-   static const T C0[] = {
+   BOOST_MATH_STATIC const T C0[] = {
       static_cast<T>(-0.333333333L),
       static_cast<T>(0.0833333333L),
       static_cast<T>(-0.0148148148L),
@@ -446,7 +467,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 24
    };
    workspace[0] = tools::evaluate_polynomial(C0, z);
 
-   static const T C1[] = {
+   BOOST_MATH_STATIC const T C1[] = {
       static_cast<T>(-0.00185185185L),
       static_cast<T>(-0.00347222222L),
       static_cast<T>(0.00264550265L),
@@ -455,7 +476,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 24
    };
    workspace[1] = tools::evaluate_polynomial(C1, z);
 
-   static const T C2[] = {
+   BOOST_MATH_STATIC const T C2[] = {
       static_cast<T>(0.00413359788L),
       static_cast<T>(-0.00268132716L),
       static_cast<T>(0.000771604938L),
@@ -467,7 +488,18 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 24
    if(x < a)
       result = -result;
 
+   #ifdef BOOST_MATH_HAS_NVRTC
+   if (boost::math::is_same_v<T, float>)
+   {
+      result += ::erfcf(::sqrtf(y)) / 2;
+   }
+   else
+   {
+      result += ::erfc(::sqrt(y)) / 2;
+   }
+   #else
    result += boost::math::erfc(sqrt(y), pol) / 2;
+   #endif
 
    return result;
 }
@@ -478,8 +510,10 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 24
 // It's use for a < 200 is not recommended, that would
 // require many more terms in the polynomials.
 //
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 template <class T, class Policy>
-T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 113> const *)
+BOOST_MATH_GPU_ENABLED T igamma_temme_large(T a, T x, const Policy& pol, const boost::math::integral_constant<int, 113>&)
 {
    BOOST_MATH_STD_USING // ADL of std functions
    T sigma = (x - a) / a;
@@ -491,7 +525,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
 
    T workspace[14];
 
-   static const T C0[] = {
+   BOOST_MATH_STATIC const T C0[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.333333333333333333333333333333333333),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.0833333333333333333333333333333333333),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.0148148148148148148148148148148148148),
@@ -526,7 +560,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[0] = tools::evaluate_polynomial(C0, z);
 
-   static const T C1[] = {
+   BOOST_MATH_STATIC const T C1[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.00185185185185185185185185185185185185),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.00347222222222222222222222222222222222),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.0026455026455026455026455026455026455),
@@ -559,7 +593,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[1] = tools::evaluate_polynomial(C1, z);
 
-   static const T C2[] = {
+   BOOST_MATH_STATIC const T C2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.0041335978835978835978835978835978836),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.00268132716049382716049382716049382716),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000771604938271604938271604938271604938),
@@ -590,7 +624,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[2] = tools::evaluate_polynomial(C2, z);
 
-   static const T C3[] = {
+   BOOST_MATH_STATIC const T C3[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000649434156378600823045267489711934156),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000229472093621399176954732510288065844),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000469189494395255712128140111679206329),
@@ -619,7 +653,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[3] = tools::evaluate_polynomial(C3, z);
 
-   static const T C4[] = {
+   BOOST_MATH_STATIC const T C4[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000861888290916711698604702719929057378),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.00078403922172006662747403488144228885),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000299072480303190179733389609932819809),
@@ -646,7 +680,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[4] = tools::evaluate_polynomial(C4, z);
 
-   static const T C5[] = {
+   BOOST_MATH_STATIC const T C5[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000336798553366358150308767592718210002),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.697281375836585777429398828575783308e-4),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.00027727532449593920787336425196507501),
@@ -667,7 +701,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[5] = tools::evaluate_polynomial(C5, z);
 
-   static const T C6[] = {
+   BOOST_MATH_STATIC const T C6[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.00053130793646399222316574854297762391),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000592166437353693882864836225604401187),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000270878209671804482771279183488328692),
@@ -686,7 +720,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[6] = tools::evaluate_polynomial(C6, z);
 
-   static const T C7[] = {
+   BOOST_MATH_STATIC const T C7[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000344367606892377671254279625108523655),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.517179090826059219337057843002058823e-4),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000334931610811422363116635090580012327),
@@ -703,7 +737,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[7] = tools::evaluate_polynomial(C7, z);
 
-   static const T C8[] = {
+   BOOST_MATH_STATIC const T C8[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000652623918595309418922034919726622692),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000839498720672087279993357516764983445),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000438297098541721005061087953050560377),
@@ -718,7 +752,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[8] = tools::evaluate_polynomial(C8, z);
 
-   static const T C9[] = {
+   BOOST_MATH_STATIC const T C9[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.000596761290192746250124390067179459605),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.720489541602001055908571930225015052e-4),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000678230883766732836161951166000673426),
@@ -731,7 +765,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[9] = tools::evaluate_polynomial(C9, z);
 
-   static const T C10[] = {
+   BOOST_MATH_STATIC const T C10[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.00133244544948006563712694993432717968),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.00191443849856547752650089885832852254),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.0011089369134596637339607446329267522),
@@ -742,7 +776,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[10] = tools::evaluate_polynomial(C10, z);
 
-   static const T C11[] = {
+   BOOST_MATH_STATIC const T C11[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.00157972766073083495908785631307733022),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.000162516262783915816898635123980270998),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.00206334210355432762645284467690276817),
@@ -751,7 +785,7 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    };
    workspace[11] = tools::evaluate_polynomial(C11, z);
 
-   static const T C12[] = {
+   BOOST_MATH_STATIC const T C12[] = {
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.00407251211951401664727281097914544601),
       BOOST_MATH_BIG_CONSTANT(T, 113, 0.00640336283380806979482363809026579583),
       BOOST_MATH_BIG_CONSTANT(T, 113, -0.00404101610816766177473974858518094879),
@@ -769,6 +803,8 @@ T igamma_temme_large(T a, T x, const Policy& pol, std::integral_constant<int, 11
    return result;
 }
 
+#endif
+
 }  // namespace detail
 }  // namespace math
 }  // namespace math
diff --git a/include/boost/math/special_functions/detail/lgamma_small.hpp b/include/boost/math/special_functions/detail/lgamma_small.hpp
index 6a4a9171fd..1ce52fac30 100644
--- a/include/boost/math/special_functions/detail/lgamma_small.hpp
+++ b/include/boost/math/special_functions/detail/lgamma_small.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,7 +11,11 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/big_constant.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/special_functions/lanczos.hpp>
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -28,15 +33,15 @@ namespace boost{ namespace math{ namespace detail{
 // These need forward declaring to keep GCC happy:
 //
 template <class T, class Policy, class Lanczos>
-T gamma_imp(T z, const Policy& pol, const Lanczos& l);
+BOOST_MATH_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const Lanczos& l);
 template <class T, class Policy>
-T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos& l);
+BOOST_MATH_GPU_ENABLED T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos& l);
 
 //
 // lgamma for small arguments:
 //
 template <class T, class Policy, class Lanczos>
-T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, const Policy& /* l */, const Lanczos&)
+BOOST_MATH_GPU_ENABLED T lgamma_small_imp(T z, T zm1, T zm2, const boost::math::integral_constant<int, 64>&, const Policy& /* l */, const Lanczos&)
 {
    // This version uses rational approximations for small
    // values of z accurate enough for 64-bit mantissas
@@ -87,7 +92,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
       // At long double: Max error found:               1.987e-21
       // Maximum Deviation Found (approximation error): 5.900e-24
       //
-      static const T P[] = {
+      BOOST_MATH_STATIC const T P[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.180355685678449379109e-1)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.25126649619989678683e-1)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.494103151567532234274e-1)),
@@ -96,7 +101,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.541009869215204396339e-3)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.324588649825948492091e-4))
       };
-      static const T Q[] = {
+      BOOST_MATH_STATIC const T Q[] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.196202987197795200688e1)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.148019669424231326694e1)),
@@ -107,7 +112,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.223352763208617092964e-6))
       };
 
-      static const float Y = 0.158963680267333984375e0f;
+      constexpr float Y = 0.158963680267333984375e0f;
 
       T r = zm2 * (z + 1);
       T R = tools::evaluate_polynomial(P, zm2);
@@ -152,9 +157,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
          // Expected Error Term:                                 3.139e-021
 
          //
-         static const float Y = 0.52815341949462890625f;
+         constexpr float Y = 0.52815341949462890625f;
 
-         static const T P[] = {
+         BOOST_MATH_STATIC const T P[] = {
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.490622454069039543534e-1)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.969117530159521214579e-1)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.414983358359495381969e0)),
@@ -163,7 +168,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.240149820648571559892e-1)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.100346687696279557415e-2))
          };
-         static const T Q[] = {
+         BOOST_MATH_STATIC const T Q[] = {
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.302349829846463038743e1)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.348739585360723852576e1)),
@@ -197,9 +202,9 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
          // Maximum Deviation Found:                           2.151e-021
          // Expected Error Term:                               2.150e-021
          //
-         static const float Y = 0.452017307281494140625f;
+         constexpr float Y = 0.452017307281494140625f;
 
-         static const T P[] = {
+         BOOST_MATH_STATIC const T P[] = {
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.292329721830270012337e-1)), 
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.144216267757192309184e0)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.142440390738631274135e0)),
@@ -207,7 +212,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.850535976868336437746e-2)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.431171342679297331241e-3))
          };
-         static const T Q[] = {
+         BOOST_MATH_STATIC const T Q[] = {
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1e1)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -0.150169356054485044494e1)),
             static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.846973248876495016101e0)),
@@ -224,8 +229,10 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 64>&, co
    }
    return result;
 }
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
 template <class T, class Policy, class Lanczos>
-T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 113>&, const Policy& /* l */, const Lanczos&)
+T lgamma_small_imp(T z, T zm1, T zm2, const boost::math::integral_constant<int, 113>&, const Policy& /* l */, const Lanczos&)
 {
    //
    // This version uses rational approximations for small
@@ -482,7 +489,7 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 113>&, c
    return result;
 }
 template <class T, class Policy, class Lanczos>
-T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 0>&, const Policy& pol, const Lanczos& l)
+BOOST_MATH_GPU_ENABLED T lgamma_small_imp(T z, T zm1, T zm2, const boost::math::integral_constant<int, 0>&, const Policy& pol, const Lanczos& l)
 {
    //
    // No rational approximations are available because either
@@ -526,6 +533,8 @@ T lgamma_small_imp(T z, T zm1, T zm2, const std::integral_constant<int, 0>&, con
    return result;
 }
 
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
 }}} // namespaces
 
 #endif // BOOST_MATH_SPECIAL_FUNCTIONS_DETAIL_LGAMMA_SMALL
diff --git a/include/boost/math/special_functions/detail/round_fwd.hpp b/include/boost/math/special_functions/detail/round_fwd.hpp
index c58459e36d..7d69f8b9c5 100644
--- a/include/boost/math/special_functions/detail/round_fwd.hpp
+++ b/include/boost/math/special_functions/detail/round_fwd.hpp
@@ -1,4 +1,5 @@
 // Copyright John Maddock 2008.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -21,53 +22,53 @@ namespace boost
    { 
 
    template <class T, class Policy>
-   typename tools::promote_args<T>::type trunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type trunc(const T& v, const Policy& pol);
    template <class T>
-   typename tools::promote_args<T>::type trunc(const T& v);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type trunc(const T& v);
    template <class T, class Policy>
-   int itrunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED int itrunc(const T& v, const Policy& pol);
    template <class T>
-   int itrunc(const T& v);
+   BOOST_MATH_GPU_ENABLED int itrunc(const T& v);
    template <class T, class Policy>
-   long ltrunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long ltrunc(const T& v, const Policy& pol);
    template <class T>
-   long ltrunc(const T& v);
+   BOOST_MATH_GPU_ENABLED long ltrunc(const T& v);
    template <class T, class Policy>
-   long long lltrunc(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v, const Policy& pol);
    template <class T>
-   long long lltrunc(const T& v);
+   BOOST_MATH_GPU_ENABLED long long lltrunc(const T& v);
    template <class T, class Policy>
-   typename tools::promote_args<T>::type round(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v, const Policy& pol);
    template <class T>
-   typename tools::promote_args<T>::type round(const T& v);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type round(const T& v);
    template <class T, class Policy>
-   int iround(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED int iround(const T& v, const Policy& pol);
    template <class T>
-   int iround(const T& v);
+   BOOST_MATH_GPU_ENABLED int iround(const T& v);
    template <class T, class Policy>
-   long lround(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long lround(const T& v, const Policy& pol);
    template <class T>
-   long lround(const T& v);
+   BOOST_MATH_GPU_ENABLED long lround(const T& v);
    template <class T, class Policy>
-   long long llround(const T& v, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED long long llround(const T& v, const Policy& pol);
    template <class T>
-   long long llround(const T& v);
+   BOOST_MATH_GPU_ENABLED long long llround(const T& v);
    template <class T, class Policy>
-   T modf(const T& v, T* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, T* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, T* ipart);
    template <class T, class Policy>
-   T modf(const T& v, int* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, int* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, int* ipart);
    template <class T, class Policy>
-   T modf(const T& v, long* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, long* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long* ipart);
    template <class T, class Policy>
-   T modf(const T& v, long long* ipart, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart, const Policy& pol);
    template <class T>
-   T modf(const T& v, long long* ipart);
+   BOOST_MATH_GPU_ENABLED T modf(const T& v, long long* ipart);
    }
 }
 
diff --git a/include/boost/math/special_functions/detail/t_distribution_inv.hpp b/include/boost/math/special_functions/detail/t_distribution_inv.hpp
index 9209b6d405..79a29a0274 100644
--- a/include/boost/math/special_functions/detail/t_distribution_inv.hpp
+++ b/include/boost/math/special_functions/detail/t_distribution_inv.hpp
@@ -11,6 +11,9 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/special_functions/cbrt.hpp>
 #include <boost/math/special_functions/round.hpp>
 #include <boost/math/special_functions/trunc.hpp>
@@ -24,7 +27,7 @@ namespace boost{ namespace math{ namespace detail{
 // Communications of the ACM, 13(10): 619-620, Oct., 1970.
 //
 template <class T, class Policy>
-T inverse_students_t_hill(T ndf, T u, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T inverse_students_t_hill(T ndf, T u, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    BOOST_MATH_ASSERT(u <= 0.5);
@@ -74,7 +77,7 @@ T inverse_students_t_hill(T ndf, T u, const Policy& pol)
 // Journal of Computational Finance, Vol 9 Issue 4, pp 37-73, Summer 2006
 //
 template <class T, class Policy>
-T inverse_students_t_tail_series(T df, T v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T inverse_students_t_tail_series(T df, T v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    // Tail series expansion, see section 6 of Shaw's paper.
@@ -125,7 +128,7 @@ T inverse_students_t_tail_series(T df, T v, const Policy& pol)
 }
 
 template <class T, class Policy>
-T inverse_students_t_body_series(T df, T u, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T inverse_students_t_body_series(T df, T u, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    //
@@ -204,7 +207,7 @@ T inverse_students_t_body_series(T df, T u, const Policy& pol)
 }
 
 template <class T, class Policy>
-T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr)
+BOOST_MATH_GPU_ENABLED T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr)
 {
    //
    // df = number of degrees of freedom.
@@ -220,7 +223,7 @@ T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr)
    if(u > v)
    {
       // function is symmetric, invert it:
-      std::swap(u, v);
+      BOOST_MATH_GPU_SAFE_SWAP(u, v);
       invert = true;
    }
    if((floor(df) == df) && (df < 20))
@@ -416,7 +419,7 @@ T inverse_students_t(T df, T u, T v, const Policy& pol, bool* pexact = nullptr)
 }
 
 template <class T, class Policy>
-inline T find_ibeta_inv_from_t_dist(T a, T p, T /*q*/, T* py, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T find_ibeta_inv_from_t_dist(T a, T p, T /*q*/, T* py, const Policy& pol)
 {
    T u = p / 2;
    T v = 1 - u;
@@ -426,8 +429,21 @@ inline T find_ibeta_inv_from_t_dist(T a, T p, T /*q*/, T* py, const Policy& pol)
    return df / (df + t * t);
 }
 
+// NVRTC requires this forward decl because there is a header cycle between here and ibeta_inverse.hpp
+#ifdef BOOST_MATH_HAS_NVRTC
+
+} // Namespace detail
+
+template <class T1, class T2, class T3, class T4, class Policy>
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3, T4>::type
+   ibeta_inv(T1 a, T2 b, T3 p, T4* py, const Policy& pol);
+
+namespace detail {
+
+#endif
+
 template <class T, class Policy>
-inline T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::false_type*)
+BOOST_MATH_GPU_ENABLED inline T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const boost::math::false_type*)
 {
    BOOST_MATH_STD_USING
    //
@@ -450,12 +466,12 @@ inline T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::f
 }
 
 template <class T, class Policy>
-T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::true_type*)
+BOOST_MATH_GPU_ENABLED T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const boost::math::true_type*)
 {
    BOOST_MATH_STD_USING
    bool invert = false;
    if((df < 2) && (floor(df) != df))
-      return boost::math::detail::fast_students_t_quantile_imp(df, p, pol, static_cast<std::false_type*>(nullptr));
+      return boost::math::detail::fast_students_t_quantile_imp(df, p, pol, static_cast<boost::math::false_type*>(nullptr));
    if(p > 0.5)
    {
       p = 1 - p;
@@ -521,7 +537,7 @@ T fast_students_t_quantile_imp(T df, T p, const Policy& pol, const std::true_typ
 }
 
 template <class T, class Policy>
-inline T fast_students_t_quantile(T df, T p, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T fast_students_t_quantile(T df, T p, const Policy& pol)
 {
    typedef typename policies::evaluation<T, Policy>::type value_type;
    typedef typename policies::normalise<
@@ -531,12 +547,12 @@ inline T fast_students_t_quantile(T df, T p, const Policy& pol)
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   typedef std::integral_constant<bool,
-      (std::numeric_limits<T>::digits <= 53)
+   typedef boost::math::integral_constant<bool,
+      (boost::math::numeric_limits<T>::digits <= 53)
        &&
-      (std::numeric_limits<T>::is_specialized)
+      (boost::math::numeric_limits<T>::is_specialized)
        &&
-      (std::numeric_limits<T>::radix == 2)
+      (boost::math::numeric_limits<T>::radix == 2)
    > tag_type;
    return policies::checked_narrowing_cast<T, forwarding_policy>(fast_students_t_quantile_imp(static_cast<value_type>(df), static_cast<value_type>(p), pol, static_cast<tag_type*>(nullptr)), "boost::math::students_t_quantile<%1%>(%1%,%1%,%1%)");
 }
diff --git a/include/boost/math/special_functions/detail/unchecked_factorial.hpp b/include/boost/math/special_functions/detail/unchecked_factorial.hpp
index b528a24fe9..92481f2c6e 100644
--- a/include/boost/math/special_functions/detail/unchecked_factorial.hpp
+++ b/include/boost/math/special_functions/detail/unchecked_factorial.hpp
@@ -10,19 +10,23 @@
 #pragma once
 #endif
 
-#ifdef _MSC_VER
-#pragma warning(push) // Temporary until lexical cast fixed.
-#pragma warning(disable: 4127 4701)
-#endif
-#include <boost/math/tools/convert_from_string.hpp>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-#include <cmath>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/array.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
-#include <boost/math/tools/cxx03_warn.hpp>
-#include <array>
-#include <type_traits>
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#  ifdef _MSC_VER
+#    pragma warning(push) // Temporary until lexical cast fixed.
+#    pragma warning(disable: 4127 4701)
+#  endif
+#  include <boost/math/tools/convert_from_string.hpp>
+#  ifdef _MSC_VER
+#    pragma warning(pop)
+#  endif
+#endif
+
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -46,11 +50,21 @@ struct max_factorial;
 template <class T, bool = true>
 struct unchecked_factorial_data;
 
+#ifdef BOOST_MATH_HAS_NVRTC
+
+// Need fwd decl
+template <typename T>
+BOOST_MATH_GPU_ENABLED inline T unchecked_factorial(unsigned i);
+
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 template <bool b>
 struct unchecked_factorial_data<float, b>
 {
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-   static constexpr std::array<float, 35> factorials = { {
+   static constexpr boost::math::array<float, 35> factorials = { {
       1.0F,
       1.0F,
       2.0F,
@@ -88,15 +102,15 @@ struct unchecked_factorial_data<float, b>
       0.29523279903960414084761860964352e39F,
    }};
 #else
-   static const std::array<float, 35> factorials;
+   static const boost::math::array<float, 35> factorials;
 #endif
 };
 
 template<bool b>
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-   constexpr std::array<float, 35> unchecked_factorial_data<float, b>::factorials;
+   constexpr boost::math::array<float, 35> unchecked_factorial_data<float, b>::factorials;
 #else
-   const std::array<float, 35> unchecked_factorial_data<float, b>::factorials = {{
+   const boost::math::array<float, 35> unchecked_factorial_data<float, b>::factorials = {{
       1.0F,
       1.0F,
       2.0F,
@@ -137,22 +151,72 @@ template<bool b>
 
 // Definitions:
 template <>
-inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial<float>(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float))
+BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial<float>(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float))
 {
    return unchecked_factorial_data<float>::factorials[i];
 }
 
+#else
+
+template <>
+BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION float unchecked_factorial<float>(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(float))
+{
+   constexpr float factorials[] = {
+      1.0F,
+      1.0F,
+      2.0F,
+      6.0F,
+      24.0F,
+      120.0F,
+      720.0F,
+      5040.0F,
+      40320.0F,
+      362880.0F,
+      3628800.0F,
+      39916800.0F,
+      479001600.0F,
+      6227020800.0F,
+      87178291200.0F,
+      1307674368000.0F,
+      20922789888000.0F,
+      355687428096000.0F,
+      6402373705728000.0F,
+      121645100408832000.0F,
+      0.243290200817664e19F,
+      0.5109094217170944e20F,
+      0.112400072777760768e22F,
+      0.2585201673888497664e23F,
+      0.62044840173323943936e24F,
+      0.15511210043330985984e26F,
+      0.403291461126605635584e27F,
+      0.10888869450418352160768e29F,
+      0.304888344611713860501504e30F,
+      0.8841761993739701954543616e31F,
+      0.26525285981219105863630848e33F,
+      0.822283865417792281772556288e34F,
+      0.26313083693369353016721801216e36F,
+      0.868331761881188649551819440128e37F,
+      0.29523279903960414084761860964352e39F,
+   };
+
+   return factorials[i];
+}
+
+#endif
+
 template <>
 struct max_factorial<float>
 {
    static constexpr unsigned value = 34;
 };
 
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 template <bool b>
 struct unchecked_factorial_data<double, b>
 {
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-   static constexpr std::array<double, 171> factorials = { {
+   static constexpr boost::math::array<double, 171> factorials = { {
       1.0,
       1.0,
       2.0,
@@ -326,15 +390,15 @@ struct unchecked_factorial_data<double, b>
       0.7257415615307998967396728211129263114717e307,
    }};
 #else
-   static const std::array<double, 171> factorials;
+   static const boost::math::array<double, 171> factorials;
 #endif
 };
 
 template <bool b>
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-   constexpr std::array<double, 171> unchecked_factorial_data<double, b>::factorials;
+   constexpr boost::math::array<double, 171> unchecked_factorial_data<double, b>::factorials;
 #else
-   const std::array<double, 171> unchecked_factorial_data<double, b>::factorials = {{
+   const boost::math::array<double, 171> unchecked_factorial_data<double, b>::factorials = {{
       1.0,
       1.0,
       2.0,
@@ -510,7 +574,7 @@ template <bool b>
 #endif
 
 template <>
-inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION double unchecked_factorial<double>(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double))
+BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION double unchecked_factorial<double>(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double))
 {
    return unchecked_factorial_data<double>::factorials[i];
 }
@@ -521,11 +585,67 @@ struct max_factorial<double>
    static constexpr unsigned value = 170;
 };
 
+#else
+
+template <>
+BOOST_MATH_GPU_ENABLED inline BOOST_MATH_CONSTEXPR_TABLE_FUNCTION double unchecked_factorial<double>(unsigned i BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(double))
+{
+   constexpr double factorials[] = {
+      1,
+      1,
+      2,
+      6,
+      24,
+      120,
+      720,
+      5040,
+      40320,
+      362880.0,
+      3628800.0,
+      39916800.0,
+      479001600.0,
+      6227020800.0,
+      87178291200.0,
+      1307674368000.0,
+      20922789888000.0,
+      355687428096000.0,
+      6402373705728000.0,
+      121645100408832000.0,
+      0.243290200817664e19,
+      0.5109094217170944e20,
+      0.112400072777760768e22,
+      0.2585201673888497664e23,
+      0.62044840173323943936e24,
+      0.15511210043330985984e26,
+      0.403291461126605635584e27,
+      0.10888869450418352160768e29,
+      0.304888344611713860501504e30,
+      0.8841761993739701954543616e31,
+      0.26525285981219105863630848e33,
+      0.822283865417792281772556288e34,
+      0.26313083693369353016721801216e36,
+      0.868331761881188649551819440128e37,
+      0.29523279903960414084761860964352e39,
+   };
+
+   return factorials[i];
+}
+
+template <>
+struct max_factorial<double>
+{
+   static constexpr unsigned value = 34;
+};
+
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 template <bool b>
 struct unchecked_factorial_data<long double, b>
 {
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-   static constexpr std::array<long double, 171> factorials = { {
+   static constexpr boost::math::array<long double, 171> factorials = { {
       1L,
       1L,
       2L,
@@ -699,15 +819,15 @@ struct unchecked_factorial_data<long double, b>
       0.7257415615307998967396728211129263114717e307L,
    }};
 #else
-   static const std::array<long double, 171> factorials;
+   static const boost::math::array<long double, 171> factorials;
 #endif
 };
 
 template <bool b>
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-   constexpr std::array<long double, 171> unchecked_factorial_data<long double, b>::factorials;
+   constexpr boost::math::array<long double, 171> unchecked_factorial_data<long double, b>::factorials;
 #else
-   const std::array<long double, 171> unchecked_factorial_data<long double, b>::factorials = {{
+   const boost::math::array<long double, 171> unchecked_factorial_data<long double, b>::factorials = {{
       1L,
       1L,
       2L,
@@ -900,7 +1020,7 @@ template <bool b>
 struct unchecked_factorial_data<BOOST_MATH_FLOAT128_TYPE, b>
 {
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-   static constexpr std::array<BOOST_MATH_FLOAT128_TYPE, 171> factorials = { {
+   static constexpr boost::math::array<BOOST_MATH_FLOAT128_TYPE, 171> factorials = { {
       1,
       1,
       2,
@@ -1074,15 +1194,15 @@ struct unchecked_factorial_data<BOOST_MATH_FLOAT128_TYPE, b>
       0.7257415615307998967396728211129263114717e307Q,
    } };
 #else
-   static const std::array<BOOST_MATH_FLOAT128_TYPE, 171> factorials;
+   static const boost::math::array<BOOST_MATH_FLOAT128_TYPE, 171> factorials;
 #endif
 };
 
 template <bool b>
 #ifdef BOOST_MATH_HAVE_CONSTEXPR_TABLES
-constexpr std::array<BOOST_MATH_FLOAT128_TYPE, 171> unchecked_factorial_data<BOOST_MATH_FLOAT128_TYPE, b>::factorials;
+constexpr boost::math::array<BOOST_MATH_FLOAT128_TYPE, 171> unchecked_factorial_data<BOOST_MATH_FLOAT128_TYPE, b>::factorials;
 #else
-const std::array<BOOST_MATH_FLOAT128_TYPE, 171> unchecked_factorial_data<BOOST_MATH_FLOAT128_TYPE, b>::factorials = { {
+const boost::math::array<BOOST_MATH_FLOAT128_TYPE, 171> unchecked_factorial_data<BOOST_MATH_FLOAT128_TYPE, b>::factorials = { {
       1,
       1,
       2,
@@ -1294,7 +1414,7 @@ const typename unchecked_factorial_initializer<T>::init unchecked_factorial_init
 
 
 template <class T, int N>
-inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, N>&)
+inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant<int, N>&)
 {
    //
    // If you're foolish enough to instantiate factorial
@@ -1308,10 +1428,10 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, N
    // unsigned int nfac = static_cast<unsigned int>(factorial<double>(n));
    // See factorial documentation for more detail.
    //
-   static_assert(!std::is_integral<T>::value && !std::numeric_limits<T>::is_integer, "Type T must not be an integral type");
+   static_assert(!boost::math::is_integral<T>::value && !boost::math::numeric_limits<T>::is_integer, "Type T must not be an integral type");
 
    // We rely on C++11 thread safe initialization here:
-   static const std::array<T, 101> factorials = {{
+   static const boost::math::array<T, 101> factorials = {{
       T(boost::math::tools::convert_from_string<T>("1")),
       T(boost::math::tools::convert_from_string<T>("1")),
       T(boost::math::tools::convert_from_string<T>("2")),
@@ -1419,7 +1539,7 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, N
 }
 
 template <class T>
-inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, 0>&)
+inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant<int, 0>&)
 {
    //
    // If you're foolish enough to instantiate factorial
@@ -1433,7 +1553,7 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, 0
    // unsigned int nfac = static_cast<unsigned int>(factorial<double>(n));
    // See factorial documentation for more detail.
    //
-   static_assert(!std::is_integral<T>::value && !std::numeric_limits<T>::is_integer, "Type T must not be an integral type");
+   static_assert(!boost::math::is_integral<T>::value && !boost::math::numeric_limits<T>::is_integer, "Type T must not be an integral type");
 
    static const char* const factorial_strings[] = {
          "1",
@@ -1556,42 +1676,48 @@ inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, 0
    return factorials[i];
 }
 
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
 template <class T>
-inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, std::numeric_limits<float>::digits>&)
+BOOST_MATH_GPU_ENABLED inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant<int, boost::math::numeric_limits<float>::digits>&)
 {
    return unchecked_factorial<float>(i);
 }
 
 template <class T>
-inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, std::numeric_limits<double>::digits>&)
+BOOST_MATH_GPU_ENABLED inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant<int, boost::math::numeric_limits<double>::digits>&)
 {
    return unchecked_factorial<double>(i);
 }
 
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 #if DBL_MANT_DIG != LDBL_MANT_DIG
 template <class T>
-inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, LDBL_MANT_DIG>&)
+inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant<int, LDBL_MANT_DIG>&)
 {
    return unchecked_factorial<long double>(i);
 }
 #endif
 #ifdef BOOST_MATH_USE_FLOAT128
 template <class T>
-inline T unchecked_factorial_imp(unsigned i, const std::integral_constant<int, 113>&)
+inline T unchecked_factorial_imp(unsigned i, const boost::math::integral_constant<int, 113>&)
 {
    return unchecked_factorial<BOOST_MATH_FLOAT128_TYPE>(i);
 }
 #endif
 
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
 template <class T>
-inline T unchecked_factorial(unsigned i)
+BOOST_MATH_GPU_ENABLED inline T unchecked_factorial(unsigned i)
 {
    typedef typename boost::math::policies::precision<T, boost::math::policies::policy<> >::type tag_type;
    return unchecked_factorial_imp<T>(i, tag_type());
 }
 
 #ifdef BOOST_MATH_USE_FLOAT128
-#define BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL : std::numeric_limits<T>::digits == 113 ? max_factorial<BOOST_MATH_FLOAT128_TYPE>::value
+#define BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL : boost::math::numeric_limits<T>::digits == 113 ? max_factorial<BOOST_MATH_FLOAT128_TYPE>::value
 #else
 #define BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL
 #endif
@@ -1600,10 +1726,12 @@ template <class T>
 struct max_factorial
 {
    static constexpr unsigned value = 
-      std::numeric_limits<T>::digits == std::numeric_limits<float>::digits ? max_factorial<float>::value 
-      : std::numeric_limits<T>::digits == std::numeric_limits<double>::digits ? max_factorial<double>::value 
-      : std::numeric_limits<T>::digits == std::numeric_limits<long double>::digits ? max_factorial<long double>::value 
+      boost::math::numeric_limits<T>::digits == boost::math::numeric_limits<float>::digits ? max_factorial<float>::value 
+      : boost::math::numeric_limits<T>::digits == boost::math::numeric_limits<double>::digits ? max_factorial<double>::value
+      #ifndef BOOST_MATH_GPU_ENABLED 
+      : boost::math::numeric_limits<T>::digits == boost::math::numeric_limits<long double>::digits ? max_factorial<long double>::value 
       BOOST_MATH_DETAIL_FLOAT128_MAX_FACTORIAL
+      #endif
       : 100;
 };
 
diff --git a/include/boost/math/special_functions/digamma.hpp b/include/boost/math/special_functions/digamma.hpp
index 3922de7d25..382ad0d6b9 100644
--- a/include/boost/math/special_functions/digamma.hpp
+++ b/include/boost/math/special_functions/digamma.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,13 +13,21 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/tools/rational.hpp>
-#include <boost/math/tools/series.hpp>
 #include <boost/math/tools/promotion.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+#include <boost/math/constants/constants.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/tools/series.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/tools/big_constant.hpp>
+#endif
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -37,11 +46,11 @@ namespace detail{
 // Begin by defining the smallest value for which it is safe to
 // use the asymptotic expansion for digamma:
 //
-inline unsigned digamma_large_lim(const std::integral_constant<int, 0>*)
+BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const boost::math::integral_constant<int, 0>*)
 {  return 20;  }
-inline unsigned digamma_large_lim(const std::integral_constant<int, 113>*)
+BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const boost::math::integral_constant<int, 113>*)
 {  return 20;  }
-inline unsigned digamma_large_lim(const void*)
+BOOST_MATH_GPU_ENABLED inline unsigned digamma_large_lim(const void*)
 {  return 10;  }
 //
 // Implementations of the asymptotic expansion come next,
@@ -53,8 +62,10 @@ inline unsigned digamma_large_lim(const void*)
 //
 // This first one gives 34-digit precision for x >= 20:
 //
+
+#ifndef BOOST_MATH_HAS_NVRTC
 template <class T>
-inline T digamma_imp_large(T x, const std::integral_constant<int, 113>*)
+inline T digamma_imp_large(T x, const boost::math::integral_constant<int, 113>*)
 {
    BOOST_MATH_STD_USING // ADL of std functions.
    static const T P[] = {
@@ -87,7 +98,7 @@ inline T digamma_imp_large(T x, const std::integral_constant<int, 113>*)
 // 19-digit precision for x >= 10:
 //
 template <class T>
-inline T digamma_imp_large(T x, const std::integral_constant<int, 64>*)
+inline T digamma_imp_large(T x, const boost::math::integral_constant<int, 64>*)
 {
    BOOST_MATH_STD_USING // ADL of std functions.
    static const T P[] = {
@@ -110,14 +121,15 @@ inline T digamma_imp_large(T x, const std::integral_constant<int, 64>*)
    result -= z * tools::evaluate_polynomial(P, z);
    return result;
 }
+#endif
 //
 // 17-digit precision for x >= 10:
 //
 template <class T>
-inline T digamma_imp_large(T x, const std::integral_constant<int, 53>*)
+BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const boost::math::integral_constant<int, 53>*)
 {
    BOOST_MATH_STD_USING // ADL of std functions.
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       0.083333333333333333333333333333333333333333333333333,
       -0.0083333333333333333333333333333333333333333333333333,
       0.003968253968253968253968253968253968253968253968254,
@@ -138,10 +150,10 @@ inline T digamma_imp_large(T x, const std::integral_constant<int, 53>*)
 // 9-digit precision for x >= 10:
 //
 template <class T>
-inline T digamma_imp_large(T x, const std::integral_constant<int, 24>*)
+BOOST_MATH_GPU_ENABLED inline T digamma_imp_large(T x, const boost::math::integral_constant<int, 24>*)
 {
    BOOST_MATH_STD_USING // ADL of std functions.
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       0.083333333333333333333333333333333333333333333333333f,
       -0.0083333333333333333333333333333333333333333333333333f,
       0.003968253968253968253968253968253968253968253968254f
@@ -153,6 +165,8 @@ inline T digamma_imp_large(T x, const std::integral_constant<int, 24>*)
    result -= z * tools::evaluate_polynomial(P, z);
    return result;
 }
+
+#ifndef BOOST_MATH_HAS_NVRTC
 //
 // Fully generic asymptotic expansion in terms of Bernoulli numbers, see:
 // http://functions.wolfram.com/06.14.06.0012.01
@@ -177,7 +191,7 @@ struct digamma_series_func
 };
 
 template <class T, class Policy>
-inline T digamma_imp_large(T x, const Policy& pol, const std::integral_constant<int, 0>*)
+inline T digamma_imp_large(T x, const Policy& pol, const boost::math::integral_constant<int, 0>*)
 {
    BOOST_MATH_STD_USING
    digamma_series_func<T> s(x);
@@ -194,7 +208,7 @@ inline T digamma_imp_large(T x, const Policy& pol, const std::integral_constant<
 // 35-digit precision:
 //
 template <class T>
-T digamma_imp_1_2(T x, const std::integral_constant<int, 113>*)
+T digamma_imp_1_2(T x, const boost::math::integral_constant<int, 113>*)
 {
    //
    // Now the approximation, we use the form:
@@ -258,7 +272,7 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 113>*)
 // 19-digit precision:
 //
 template <class T>
-T digamma_imp_1_2(T x, const std::integral_constant<int, 64>*)
+T digamma_imp_1_2(T x, const boost::math::integral_constant<int, 64>*)
 {
    //
    // Now the approximation, we use the form:
@@ -306,11 +320,13 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 64>*)
 
    return result;
 }
+
+#endif
 //
 // 18-digit precision:
 //
 template <class T>
-T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
+BOOST_MATH_GPU_ENABLED T digamma_imp_1_2(T x, const boost::math::integral_constant<int, 53>*)
 {
    //
    // Now the approximation, we use the form:
@@ -325,13 +341,13 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
    // At double precision, max error found:  2.452e-17
    //
    // LCOV_EXCL_START
-   static const float Y = 0.99558162689208984F;
+   BOOST_MATH_STATIC const float Y = 0.99558162689208984F;
 
-   static const T root1 = T(1569415565) / 1073741824uL;
-   static const T root2 = (T(381566830) / 1073741824uL) / 1073741824uL;
-   static const T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19);
+   BOOST_MATH_STATIC const T root1 = T(1569415565) / 1073741824uL;
+   BOOST_MATH_STATIC const T root2 = (T(381566830) / 1073741824uL) / 1073741824uL;
+   BOOST_MATH_STATIC const T root3 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.9016312093258695918615325266959189453125e-19);
 
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.25479851061131551),
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.32555031186804491),
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.65031853770896507),
@@ -339,7 +355,7 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.045251321448739056),
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.0020713321167745952)
    };
-   static const T Q[] = {
+   BOOST_MATH_STATIC const T Q[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 53, 2.0767117023730469),
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.4606242909763515),
@@ -361,7 +377,7 @@ T digamma_imp_1_2(T x, const std::integral_constant<int, 53>*)
 // 9-digit precision:
 //
 template <class T>
-inline T digamma_imp_1_2(T x, const std::integral_constant<int, 24>*)
+BOOST_MATH_GPU_ENABLED inline T digamma_imp_1_2(T x, const boost::math::integral_constant<int, 24>*)
 {
    //
    // Now the approximation, we use the form:
@@ -376,16 +392,16 @@ inline T digamma_imp_1_2(T x, const std::integral_constant<int, 24>*)
    // At float precision, max error found:  2.008725e-008
    //
    // LCOV_EXCL_START
-   static const float Y = 0.99558162689208984f;
-   static const T root = 1532632.0f / 1048576;
-   static const T root_minor = static_cast<T>(0.3700660185912626595423257213284682051735604e-6L);
-   static const T P[] = {
+   BOOST_MATH_STATIC const float Y = 0.99558162689208984f;
+   BOOST_MATH_STATIC const T root = 1532632.0f / 1048576;
+   BOOST_MATH_STATIC const T root_minor = static_cast<T>(0.3700660185912626595423257213284682051735604e-6L);
+   BOOST_MATH_STATIC const T P[] = {
       0.25479851023250261e0f,
       -0.44981331915268368e0f,
       -0.43916936919946835e0f,
       -0.61041765350579073e-1f
    };
-   static const T Q[] = {
+   BOOST_MATH_STATIC const T Q[] = {
       0.1e1f,
       0.15890202430554952e1f,
       0.65341249856146947e0f,
@@ -401,7 +417,7 @@ inline T digamma_imp_1_2(T x, const std::integral_constant<int, 24>*)
 }
 
 template <class T, class Tag, class Policy>
-T digamma_imp(T x, const Tag* t, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T digamma_imp(T x, const Tag* t, const Policy& pol)
 {
    //
    // This handles reflection of negative arguments, and all our
@@ -439,11 +455,13 @@ T digamma_imp(T x, const Tag* t, const Policy& pol)
    // If we're above the lower-limit for the
    // asymptotic expansion then use it:
    //
+   #ifndef BOOST_MATH_HAS_NVRTC
    if(x >= digamma_large_lim(t))
    {
       result += digamma_imp_large(x, t);
    }
    else
+   #endif
    {
       //
       // If x > 2 reduce to the interval [1,2]:
@@ -466,8 +484,10 @@ T digamma_imp(T x, const Tag* t, const Policy& pol)
    return result;
 }
 
+#ifndef BOOST_MATH_HAS_NVRTC
+
 template <class T, class Policy>
-T digamma_imp(T x, const std::integral_constant<int, 0>* t, const Policy& pol)
+T digamma_imp(T x, const boost::math::integral_constant<int, 0>* t, const Policy& pol)
 {
    //
    // This handles reflection of negative arguments, and all our
@@ -564,16 +584,18 @@ T digamma_imp(T x, const std::integral_constant<int, 0>* t, const Policy& pol)
    // LCOV_EXCL_STOP
 }
 
+#endif
+
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    digamma(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
    typedef typename policies::precision<T, Policy>::type precision_type;
-   typedef std::integral_constant<int,
+   typedef boost::math::integral_constant<int,
       (precision_type::value <= 0) || (precision_type::value > 113) ? 0 :
       precision_type::value <= 24 ? 24 :
       precision_type::value <= 53 ? 53 :
@@ -592,7 +614,7 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    digamma(T x)
 {
    return digamma(x, policies::policy<>());
diff --git a/include/boost/math/special_functions/ellint_1.hpp b/include/boost/math/special_functions/ellint_1.hpp
index dfc1815f7f..96c7c9e9b9 100644
--- a/include/boost/math/special_functions/ellint_1.hpp
+++ b/include/boost/math/special_functions/ellint_1.hpp
@@ -1,5 +1,6 @@
 //  Copyright (c) 2006 Xiaogang Zhang
 //  Copyright (c) 2006 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -18,6 +19,8 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_rf.hpp>
 #include <boost/math/constants/constants.hpp>
@@ -31,28 +34,28 @@
 namespace boost { namespace math {
 
 template <class T1, class T2, class Policy>
-typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const Policy& pol);
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const Policy& pol);
 
 namespace detail{
 
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 0> const&);
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant<int, 0> const&);
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 1> const&);
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant<int, 1> const&);
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 2> const&);
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant<int, 2> const&);
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, T one_minus_k2);
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, T one_minus_k2);
 
 // Elliptic integral (Legendre form) of the first kind
 template <typename T, typename Policy>
-T ellint_f_imp(T phi, T k, const Policy& pol, T one_minus_k2)
+BOOST_MATH_GPU_ENABLED T ellint_f_imp(T phi, T k, const Policy& pol, T one_minus_k2)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
     using namespace boost::math::constants;
 
-    static const char* function = "boost::math::ellint_f<%1%>(%1%,%1%)";
+    constexpr auto function = "boost::math::ellint_f<%1%>(%1%,%1%)";
     BOOST_MATH_INSTRUMENT_VARIABLE(phi);
     BOOST_MATH_INSTRUMENT_VARIABLE(k);
     BOOST_MATH_INSTRUMENT_VARIABLE(function);
@@ -149,19 +152,19 @@ T ellint_f_imp(T phi, T k, const Policy& pol, T one_minus_k2)
 }
 
 template <typename T, typename Policy>
-inline T ellint_f_imp(T phi, T k, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T ellint_f_imp(T phi, T k, const Policy& pol)
 {
    return ellint_f_imp(phi, k, pol, T(1 - k * k));
 }
 
 // Complete elliptic integral (Legendre form) of the first kind
 template <typename T, typename Policy>
-T ellint_k_imp(T k, const Policy& pol, T one_minus_k2)
+BOOST_MATH_GPU_ENABLED T ellint_k_imp(T k, const Policy& pol, T one_minus_k2)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
 
-    static const char* function = "boost::math::ellint_k<%1%>(%1%)";
+    constexpr auto function = "boost::math::ellint_k<%1%>(%1%)";
 
     if (abs(k) > 1)
     {
@@ -179,7 +182,7 @@ T ellint_k_imp(T k, const Policy& pol, T one_minus_k2)
     return value;
 }
 template <typename T, typename Policy>
-inline T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 2> const&)
+BOOST_MATH_GPU_ENABLED inline T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant<int, 2> const&)
 {
    return ellint_k_imp(k, pol, T(1 - k * k));
 }
@@ -201,9 +204,9 @@ inline T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 2> con
 // archived in the code below), but was found to have slightly higher error rates.
 //
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 0> const&)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant<int, 0> const&)
 {
-   using std::abs;
+   BOOST_MATH_STD_USING
    using namespace boost::math::tools;
 
    T m = k * k;
@@ -454,7 +457,7 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons
       // This handles all cases where m > 0.9, 
       // including all error handling:
       //
-      return ellint_k_imp(k, pol, std::integral_constant<int, 2>());
+      return ellint_k_imp(k, pol, boost::math::integral_constant<int, 2>());
 #if 0
    else
    {
@@ -474,9 +477,9 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons
    }
 }
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_constant<int, 1> const&)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, boost::math::integral_constant<int, 1> const&)
 {
-   using std::abs;
+   BOOST_MATH_STD_USING
    using namespace boost::math::tools;
 
    T m = k * k;
@@ -755,44 +758,37 @@ BOOST_MATH_FORCEINLINE T ellint_k_imp(T k, const Policy& pol, std::integral_cons
       // All cases where m > 0.9
       // including all error handling:
       //
-      return ellint_k_imp(k, pol, std::integral_constant<int, 2>());
+      return ellint_k_imp(k, pol, boost::math::integral_constant<int, 2>());
    }
 }
 
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_1(T k, const Policy& pol, const std::true_type&)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type ellint_1(T k, const Policy& pol, const boost::math::true_type&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
-   typedef std::integral_constant<int, 
+   typedef boost::math::integral_constant<int, 
 #if defined(__clang_major__) && (__clang_major__ == 7)
       2
 #else
-      std::is_floating_point<T>::value && std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 54) ? 0 :
-      std::is_floating_point<T>::value && std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 64) ? 1 : 2
+      boost::math::is_floating_point<T>::value && boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 54) ? 0 :
+      boost::math::is_floating_point<T>::value && boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 64) ? 1 : 2
 #endif
    > precision_tag_type;
    return policies::checked_narrowing_cast<result_type, Policy>(detail::ellint_k_imp(static_cast<value_type>(k), pol, precision_tag_type()), "boost::math::ellint_1<%1%>(%1%)");
 }
 
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const std::false_type&)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const boost::math::false_type&)
 {
    return boost::math::ellint_1(k, phi, policies::policy<>());
 }
 
-}
-
-// Complete elliptic integral (Legendre form) of the first kind
-template <typename T>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_1(T k)
-{
-   return ellint_1(k, policies::policy<>());
-}
+} // namespace detail
 
 // Elliptic integral (Legendre form) of the first kind
 template <class T1, class T2, class Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -800,12 +796,19 @@ BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k,
 }
 
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_1(T1 k, T2 phi)
 {
    typedef typename policies::is_policy<T2>::type tag_type;
    return detail::ellint_1(k, phi, tag_type());
 }
 
+// Complete elliptic integral (Legendre form) of the first kind
+template <typename T>
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type ellint_1(T k)
+{
+   return ellint_1(k, policies::policy<>());
+}
+
 }} // namespaces
 
 #endif // BOOST_MATH_ELLINT_1_HPP
diff --git a/include/boost/math/special_functions/ellint_2.hpp b/include/boost/math/special_functions/ellint_2.hpp
index b09cdd490e..0cc1fa0944 100644
--- a/include/boost/math/special_functions/ellint_2.hpp
+++ b/include/boost/math/special_functions/ellint_2.hpp
@@ -1,5 +1,6 @@
 //  Copyright (c) 2006 Xiaogang Zhang
 //  Copyright (c) 2006 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -18,6 +19,9 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_rf.hpp>
 #include <boost/math/special_functions/ellint_rd.hpp>
@@ -33,20 +37,20 @@
 namespace boost { namespace math {
 
 template <class T1, class T2, class Policy>
-typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const Policy& pol);
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const Policy& pol);
 
 namespace detail{
 
 template <typename T, typename Policy>
-T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 0>&);
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const boost::math::integral_constant<int, 0>&);
 template <typename T, typename Policy>
-T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 1>&);
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const boost::math::integral_constant<int, 1>&);
 template <typename T, typename Policy>
-T ellint_e_imp(T k, const Policy& pol, const std::integral_constant<int, 2>&);
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, const boost::math::integral_constant<int, 2>&);
 
 // Elliptic integral (Legendre form) of the second kind
 template <typename T, typename Policy>
-T ellint_e_imp(T phi, T k, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ellint_e_imp(T phi, T k, const Policy& pol)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
@@ -71,9 +75,9 @@ T ellint_e_imp(T phi, T k, const Policy& pol)
     }
     else if(phi > 1 / tools::epsilon<T>())
     {
-       typedef std::integral_constant<int,
-          std::is_floating_point<T>::value&& std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 54) ? 0 :
-          std::is_floating_point<T>::value && std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 64) ? 1 : 2
+       typedef boost::math::integral_constant<int,
+          boost::math::is_floating_point<T>::value&& boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 54) ? 0 :
+          boost::math::is_floating_point<T>::value && boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 64) ? 1 : 2
        > precision_tag_type;
        // Phi is so large that phi%pi is necessarily zero (or garbage),
        // just return the second part of the duplication formula:
@@ -138,9 +142,9 @@ T ellint_e_imp(T phi, T k, const Policy& pol)
        }
        if (m != 0)
        {
-          typedef std::integral_constant<int,
-             std::is_floating_point<T>::value&& std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 54) ? 0 :
-             std::is_floating_point<T>::value && std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 64) ? 1 : 2
+          typedef boost::math::integral_constant<int,
+             boost::math::is_floating_point<T>::value&& boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 54) ? 0 :
+             boost::math::is_floating_point<T>::value && boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 64) ? 1 : 2
           > precision_tag_type;
           result += m * ellint_e_imp(k, pol, precision_tag_type());
        }
@@ -150,7 +154,7 @@ T ellint_e_imp(T phi, T k, const Policy& pol)
 
 // Complete elliptic integral (Legendre form) of the second kind
 template <typename T, typename Policy>
-T ellint_e_imp(T k, const Policy& pol, std::integral_constant<int, 2> const&)
+BOOST_MATH_GPU_ENABLED T ellint_e_imp(T k, const Policy& pol, boost::math::integral_constant<int, 2> const&)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
@@ -188,9 +192,9 @@ T ellint_e_imp(T k, const Policy& pol, std::integral_constant<int, 2> const&)
 // existing routines.
 //
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_constant<int, 0> const&)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, boost::math::integral_constant<int, 0> const&)
 {
-   using std::abs;
+   BOOST_MATH_STD_USING
    using namespace boost::math::tools;
 
    T m = k * k;
@@ -423,13 +427,13 @@ BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_cons
       // All cases where m > 0.9
       // including all error handling:
       //
-      return ellint_e_imp(k, pol, std::integral_constant<int, 2>());
+      return ellint_e_imp(k, pol, boost::math::integral_constant<int, 2>());
    }
 }
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_constant<int, 1> const&)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, boost::math::integral_constant<int, 1> const&)
 {
-   using std::abs;
+   BOOST_MATH_STD_USING
    using namespace boost::math::tools;
 
    T m = k * k;
@@ -696,54 +700,56 @@ BOOST_MATH_FORCEINLINE T ellint_e_imp(T k, const Policy& pol, std::integral_cons
       // All cases where m > 0.9
       // including all error handling:
       //
-      return ellint_e_imp(k, pol, std::integral_constant<int, 2>());
+      return ellint_e_imp(k, pol, boost::math::integral_constant<int, 2>());
    }
 }
 
 template <typename T, typename Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_2(T k, const Policy& pol, const std::true_type&)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type ellint_2(T k, const Policy& pol, const boost::math::true_type&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
-   typedef std::integral_constant<int,
-      std::is_floating_point<T>::value&& std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 54) ? 0 :
-      std::is_floating_point<T>::value && std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 64) ? 1 : 2
+   typedef boost::math::integral_constant<int,
+      boost::math::is_floating_point<T>::value&& boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 54) ? 0 :
+      boost::math::is_floating_point<T>::value && boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 64) ? 1 : 2
    > precision_tag_type;
    return policies::checked_narrowing_cast<result_type, Policy>(detail::ellint_e_imp(static_cast<value_type>(k), pol, precision_tag_type()), "boost::math::ellint_2<%1%>(%1%)");
 }
 
 // Elliptic integral (Legendre form) of the second kind
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const std::false_type&)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const boost::math::false_type&)
 {
    return boost::math::ellint_2(k, phi, policies::policy<>());
 }
 
 } // detail
 
-// Complete elliptic integral (Legendre form) of the second kind
-template <typename T>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T>::type ellint_2(T k)
-{
-   return ellint_2(k, policies::policy<>());
-}
-
 // Elliptic integral (Legendre form) of the second kind
 template <class T1, class T2>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi)
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi)
 {
    typedef typename policies::is_policy<T2>::type tag_type;
    return detail::ellint_2(k, phi, tag_type());
 }
 
 template <class T1, class T2, class Policy>
-BOOST_MATH_FORCEINLINE typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_2(T1 k, T2 phi, const Policy& pol)  // LCOV_EXCL_LINE gcc misses this but sees the function body, strange!
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
    return policies::checked_narrowing_cast<result_type, Policy>(detail::ellint_e_imp(static_cast<value_type>(phi), static_cast<value_type>(k), pol), "boost::math::ellint_2<%1%>(%1%,%1%)");
 }
 
+
+// Complete elliptic integral (Legendre form) of the second kind
+template <typename T>
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T>::type ellint_2(T k)
+{
+   return ellint_2(k, policies::policy<>());
+}
+
+
 }} // namespaces
 
 #endif // BOOST_MATH_ELLINT_2_HPP
diff --git a/include/boost/math/special_functions/ellint_3.hpp b/include/boost/math/special_functions/ellint_3.hpp
index 33acc545dc..b8df7e2645 100644
--- a/include/boost/math/special_functions/ellint_3.hpp
+++ b/include/boost/math/special_functions/ellint_3.hpp
@@ -18,6 +18,8 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_rf.hpp>
 #include <boost/math/special_functions/ellint_rj.hpp>
@@ -38,16 +40,16 @@ namespace boost { namespace math {
 namespace detail{
 
 template <typename T, typename Policy>
-T ellint_pi_imp(T v, T k, T vc, const Policy& pol);
+BOOST_MATH_CUDA_ENABLED T ellint_pi_imp(T v, T k, T vc, const Policy& pol);
 
 // Elliptic integral (Legendre form) of the third kind
 template <typename T, typename Policy>
-T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol)
+BOOST_MATH_CUDA_ENABLED T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol)
 {
    // Note vc = 1-v presumably without cancellation error.
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::ellint_3<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ellint_3<%1%>(%1%,%1%,%1%)";
 
 
    T sphi = sin(fabs(phi));
@@ -270,13 +272,13 @@ T ellint_pi_imp(T v, T phi, T k, T vc, const Policy& pol)
 
 // Complete elliptic integral (Legendre form) of the third kind
 template <typename T, typename Policy>
-T ellint_pi_imp(T v, T k, T vc, const Policy& pol)
+BOOST_MATH_CUDA_ENABLED T ellint_pi_imp(T v, T k, T vc, const Policy& pol)
 {
     // Note arg vc = 1-v, possibly without cancellation errors
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
 
-    static const char* function = "boost::math::ellint_pi<%1%>(%1%,%1%)";
+    constexpr auto function = "boost::math::ellint_pi<%1%>(%1%,%1%)";
 
     if (abs(k) >= 1)
     {
@@ -318,13 +320,13 @@ T ellint_pi_imp(T v, T k, T vc, const Policy& pol)
 }
 
 template <class T1, class T2, class T3>
-inline typename tools::promote_args<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi, const std::false_type&)
+BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi, const boost::math::false_type&)
 {
    return boost::math::ellint_3(k, v, phi, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type ellint_3(T1 k, T2 v, const Policy& pol, const std::true_type&)
+BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args<T1, T2>::type ellint_3(T1 k, T2 v, const Policy& pol, const boost::math::true_type&)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -339,7 +341,7 @@ inline typename tools::promote_args<T1, T2>::type ellint_3(T1 k, T2 v, const Pol
 } // namespace detail
 
 template <class T1, class T2, class T3, class Policy>
-inline typename tools::promote_args<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi, const Policy&)
+BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi, const Policy&)
 {
    typedef typename tools::promote_args<T1, T2, T3>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -354,14 +356,14 @@ inline typename tools::promote_args<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 ph
 }
 
 template <class T1, class T2, class T3>
-typename detail::ellint_3_result<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi)
+BOOST_MATH_CUDA_ENABLED typename detail::ellint_3_result<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi)
 {
    typedef typename policies::is_policy<T3>::type tag_type;
    return detail::ellint_3(k, v, phi, tag_type());
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type ellint_3(T1 k, T2 v)
+BOOST_MATH_CUDA_ENABLED inline typename tools::promote_args<T1, T2>::type ellint_3(T1 k, T2 v)
 {
    return ellint_3(k, v, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/ellint_d.hpp b/include/boost/math/special_functions/ellint_d.hpp
index da1e87ba3e..f5a8491f5a 100644
--- a/include/boost/math/special_functions/ellint_d.hpp
+++ b/include/boost/math/special_functions/ellint_d.hpp
@@ -1,5 +1,6 @@
 //  Copyright (c) 2006 Xiaogang Zhang
 //  Copyright (c) 2006 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -18,6 +19,8 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_rf.hpp>
 #include <boost/math/special_functions/ellint_rd.hpp>
@@ -33,16 +36,16 @@
 namespace boost { namespace math {
 
 template <class T1, class T2, class Policy>
-typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi, const Policy& pol);
+BOOST_MATH_GPU_ENABLED typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi, const Policy& pol);
 
 namespace detail{
 
 template <typename T, typename Policy>
-T ellint_d_imp(T k, const Policy& pol);
+BOOST_MATH_GPU_ENABLED T ellint_d_imp(T k, const Policy& pol);
 
 // Elliptic integral (Legendre form) of the second kind
 template <typename T, typename Policy>
-T ellint_d_imp(T phi, T k, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ellint_d_imp(T phi, T k, const Policy& pol)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
@@ -113,7 +116,7 @@ T ellint_d_imp(T phi, T k, const Policy& pol)
 
 // Complete elliptic integral (Legendre form) of the second kind
 template <typename T, typename Policy>
-T ellint_d_imp(T k, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ellint_d_imp(T k, const Policy& pol)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
@@ -135,7 +138,7 @@ T ellint_d_imp(T k, const Policy& pol)
 }
 
 template <typename T, typename Policy>
-inline typename tools::promote_args<T>::type ellint_d(T k, const Policy& pol, const std::true_type&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type ellint_d(T k, const Policy& pol, const boost::math::true_type&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -144,7 +147,7 @@ inline typename tools::promote_args<T>::type ellint_d(T k, const Policy& pol, co
 
 // Elliptic integral (Legendre form) of the second kind
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi, const std::false_type&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi, const boost::math::false_type&)
 {
    return boost::math::ellint_d(k, phi, policies::policy<>());
 }
@@ -153,21 +156,21 @@ inline typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi, const s
 
 // Complete elliptic integral (Legendre form) of the second kind
 template <typename T>
-inline typename tools::promote_args<T>::type ellint_d(T k)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type ellint_d(T k)
 {
    return ellint_d(k, policies::policy<>());
 }
 
 // Elliptic integral (Legendre form) of the second kind
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi)
 {
    typedef typename policies::is_policy<T2>::type tag_type;
    return detail::ellint_d(k, phi, tag_type());
 }
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type ellint_d(T1 k, T2 phi, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
diff --git a/include/boost/math/special_functions/ellint_rc.hpp b/include/boost/math/special_functions/ellint_rc.hpp
index 2f9a1f8cfb..ae3c6375e5 100644
--- a/include/boost/math/special_functions/ellint_rc.hpp
+++ b/include/boost/math/special_functions/ellint_rc.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -18,12 +19,11 @@
 #pragma once
 #endif
 
-#include <boost/math/policies/error_handling.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/constants/constants.hpp>
-#include <iostream>
 
 // Carlson's degenerate elliptic integral
 // R_C(x, y) = R_F(x, y, y) = 0.5 * \int_{0}^{\infty} (t+x)^{-1/2} (t+y)^{-1} dt
@@ -32,11 +32,11 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T ellint_rc_imp(T x, T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ellint_rc_imp(T x, T y, const Policy& pol)
 {
     BOOST_MATH_STD_USING
 
-    static const char* function = "boost::math::ellint_rc<%1%>(%1%,%1%)";
+    constexpr auto function = "boost::math::ellint_rc<%1%>(%1%,%1%)";
 
     if(x < 0)
     {
@@ -88,7 +88,7 @@ T ellint_rc_imp(T x, T y, const Policy& pol)
 } // namespace detail
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type 
    ellint_rc(T1 x, T2 y, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
@@ -100,7 +100,7 @@ inline typename tools::promote_args<T1, T2>::type
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type 
    ellint_rc(T1 x, T2 y)
 {
    return ellint_rc(x, y, policies::policy<>());
diff --git a/include/boost/math/special_functions/ellint_rd.hpp b/include/boost/math/special_functions/ellint_rd.hpp
index 2a79e54ca2..f2a33adc46 100644
--- a/include/boost/math/special_functions/ellint_rd.hpp
+++ b/include/boost/math/special_functions/ellint_rd.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock.
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -16,10 +17,10 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/promotion.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_rc.hpp>
-#include <boost/math/special_functions/pow.hpp>
-#include <boost/math/tools/config.hpp>
 #include <boost/math/policies/error_handling.hpp>
 
 // Carlson's elliptic integral of the second kind
@@ -29,12 +30,11 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T ellint_rd_imp(T x, T y, T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ellint_rd_imp(T x, T y, T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
-   using std::swap;
 
-   static const char* function = "boost::math::ellint_rd<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ellint_rd<%1%>(%1%,%1%,%1%)";
 
    if(x < 0)
    {
@@ -55,9 +55,11 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol)
    //
    // Special cases from http://dlmf.nist.gov/19.20#iv
    //
-   using std::swap;
+
    if(x == z)
-      swap(x, y);
+   {
+      BOOST_MATH_GPU_SAFE_SWAP(x, y);
+   }
    if(y == z)
    {
       if(x == y)
@@ -70,19 +72,21 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol)
       }
       else
       {
-         if((std::max)(x, y) / (std::min)(x, y) > T(1.3))
+         if(BOOST_MATH_GPU_SAFE_MAX(x, y) / BOOST_MATH_GPU_SAFE_MIN(x, y) > T(1.3))
             return 3 * (ellint_rc_imp(x, y, pol) - sqrt(x) / y) / (2 * (y - x));
          // Otherwise fall through to avoid cancellation in the above (RC(x,y) -> 1/x^0.5 as x -> y)
       }
    }
    if(x == y)
    {
-      if((std::max)(x, z) / (std::min)(x, z) > T(1.3))
+      if(BOOST_MATH_GPU_SAFE_MAX(x, z) / BOOST_MATH_GPU_SAFE_MIN(x, z) > T(1.3))
          return 3 * (ellint_rc_imp(z, x, pol) - 1 / sqrt(z)) / (z - x);
       // Otherwise fall through to avoid cancellation in the above (RC(x,y) -> 1/x^0.5 as x -> y)
    }
    if(y == 0)
-      swap(x, y);
+   {
+      BOOST_MATH_GPU_SAFE_SWAP(x, y);
+   }
    if(x == 0)
    {
       //
@@ -102,7 +106,8 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol)
          xn = (xn + yn) / 2;
          yn = t;
          sum_pow *= 2;
-         sum += sum_pow * boost::math::pow<2>(xn - yn);
+         const auto temp = (xn - yn);
+         sum += sum_pow * temp * temp;
       }
       T RF = constants::pi<T>() / (xn + yn);
       //
@@ -128,7 +133,7 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol)
    T An = (x + y + 3 * z) / 5;
    T A0 = An;
    // This has an extra 1.2 fudge factor which is really only needed when x, y and z are close in magnitude:
-   T Q = pow(tools::epsilon<T>() / 4, -T(1) / 8) * (std::max)((std::max)(An - x, An - y), An - z) * 1.2f;
+   T Q = pow(tools::epsilon<T>() / 4, -T(1) / 8) * BOOST_MATH_GPU_SAFE_MAX(BOOST_MATH_GPU_SAFE_MAX(An - x, An - y), An - z) * 1.2f;
    BOOST_MATH_INSTRUMENT_VARIABLE(Q);
    T lambda, rx, ry, rz;
    unsigned k = 0;
@@ -177,7 +182,7 @@ T ellint_rd_imp(T x, T y, T z, const Policy& pol)
 } // namespace detail
 
 template <class T1, class T2, class T3, class Policy>
-inline typename tools::promote_args<T1, T2, T3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type 
    ellint_rd(T1 x, T2 y, T3 z, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2, T3>::type result_type;
@@ -190,7 +195,7 @@ inline typename tools::promote_args<T1, T2, T3>::type
 }
 
 template <class T1, class T2, class T3>
-inline typename tools::promote_args<T1, T2, T3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type 
    ellint_rd(T1 x, T2 y, T3 z)
 {
    return ellint_rd(x, y, z, policies::policy<>());
diff --git a/include/boost/math/special_functions/ellint_rf.hpp b/include/boost/math/special_functions/ellint_rf.hpp
index c781ac0353..eb1c2b6e71 100644
--- a/include/boost/math/special_functions/ellint_rf.hpp
+++ b/include/boost/math/special_functions/ellint_rf.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -17,8 +18,9 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/ellint_rc.hpp>
@@ -30,21 +32,20 @@
 namespace boost { namespace math { namespace detail{
 
    template <typename T, typename Policy>
-   T ellint_rf_imp(T x, T y, T z, const Policy& pol)
+   BOOST_MATH_GPU_ENABLED T ellint_rf_imp(T x, T y, T z, const Policy& pol)
    {
       BOOST_MATH_STD_USING
       using namespace boost::math;
-      using std::swap;
 
-      static const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)";
+      constexpr auto function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)";
 
       if(x < 0 || y < 0 || z < 0)
       {
-         return policies::raise_domain_error<T>(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", std::numeric_limits<T>::quiet_NaN(), pol);
+         return policies::raise_domain_error<T>(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", boost::math::numeric_limits<T>::quiet_NaN(), pol);
       }
       if(x + y == 0 || y + z == 0 || z + x == 0)
       {
-         return policies::raise_domain_error<T>(function, "domain error, at most one argument can be zero, only sensible result is %1%.", std::numeric_limits<T>::quiet_NaN(), pol);
+         return policies::raise_domain_error<T>(function, "domain error, at most one argument can be zero, only sensible result is %1%.", boost::math::numeric_limits<T>::quiet_NaN(), pol);
       }
       //
       // Special cases from http://dlmf.nist.gov/19.20#i
@@ -80,9 +81,9 @@ namespace boost { namespace math { namespace detail{
             return ellint_rc_imp(x, y, pol);
       }
       if(x == 0)
-         swap(x, z);
+         BOOST_MATH_GPU_SAFE_SWAP(x, z);
       else if(y == 0)
-         swap(y, z);
+         BOOST_MATH_GPU_SAFE_SWAP(y, z);
       if(z == 0)
       {
          //
@@ -105,7 +106,7 @@ namespace boost { namespace math { namespace detail{
       T zn = z;
       T An = (x + y + z) / 3;
       T A0 = An;
-      T Q = pow(3 * boost::math::tools::epsilon<T>(), T(-1) / 8) * (std::max)((std::max)(fabs(An - xn), fabs(An - yn)), fabs(An - zn));
+      T Q = pow(3 * boost::math::tools::epsilon<T>(), T(-1) / 8) * BOOST_MATH_GPU_SAFE_MAX(BOOST_MATH_GPU_SAFE_MAX(fabs(An - xn), fabs(An - yn)), fabs(An - zn));
       T fn = 1;
 
 
@@ -143,7 +144,7 @@ namespace boost { namespace math { namespace detail{
 } // namespace detail
 
 template <class T1, class T2, class T3, class Policy>
-inline typename tools::promote_args<T1, T2, T3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type 
    ellint_rf(T1 x, T2 y, T3 z, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2, T3>::type result_type;
@@ -156,7 +157,7 @@ inline typename tools::promote_args<T1, T2, T3>::type
 }
 
 template <class T1, class T2, class T3>
-inline typename tools::promote_args<T1, T2, T3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type 
    ellint_rf(T1 x, T2 y, T3 z)
 {
    return ellint_rf(x, y, z, policies::policy<>());
diff --git a/include/boost/math/special_functions/ellint_rg.hpp b/include/boost/math/special_functions/ellint_rg.hpp
index 051c104bca..8a7f706ac0 100644
--- a/include/boost/math/special_functions/ellint_rg.hpp
+++ b/include/boost/math/special_functions/ellint_rg.hpp
@@ -10,8 +10,8 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/ellint_rd.hpp>
@@ -21,27 +21,26 @@
 namespace boost { namespace math { namespace detail{
 
    template <typename T, typename Policy>
-   T ellint_rg_imp(T x, T y, T z, const Policy& pol)
+   BOOST_MATH_GPU_ENABLED T ellint_rg_imp(T x, T y, T z, const Policy& pol)
    {
       BOOST_MATH_STD_USING
-      static const char* function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)";
+      constexpr auto function = "boost::math::ellint_rf<%1%>(%1%,%1%,%1%)";
 
       if(x < 0 || y < 0 || z < 0)
       {
-         return policies::raise_domain_error<T>(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", std::numeric_limits<T>::quiet_NaN(), pol);
+         return policies::raise_domain_error<T>(function, "domain error, all arguments must be non-negative, only sensible result is %1%.", boost::math::numeric_limits<T>::quiet_NaN(), pol);
       }
       //
       // Function is symmetric in x, y and z, but we require
       // (x - z)(y - z) >= 0 to avoid cancellation error in the result
       // which implies (for example) x >= z >= y
       //
-      using std::swap;
       if(x < y)
-         swap(x, y);
+         BOOST_MATH_GPU_SAFE_SWAP(x, y);
       if(x < z)
-         swap(x, z);
+         BOOST_MATH_GPU_SAFE_SWAP(x, z);
       if(y > z)
-         swap(y, z);
+         BOOST_MATH_GPU_SAFE_SWAP(y, z);
       
       BOOST_MATH_ASSERT(x >= z);
       BOOST_MATH_ASSERT(z >= y);
@@ -64,7 +63,7 @@ namespace boost { namespace math { namespace detail{
          else
          {
             // x = z, y != 0
-            swap(x, y);
+            BOOST_MATH_GPU_SAFE_SWAP(x, y);
             return (x == 0) ? T(sqrt(z) / 2) : T((z * ellint_rc_imp(x, z, pol) + sqrt(x)) / 2);
          }
       }
@@ -75,7 +74,7 @@ namespace boost { namespace math { namespace detail{
       }
       else if(y == 0)
       {
-         swap(y, z);
+         BOOST_MATH_GPU_SAFE_SWAP(y, z);
          //
          // Special handling for common case, from
          // Numerical Computation of Real or Complex Elliptic Integrals, eq.46
@@ -106,7 +105,7 @@ namespace boost { namespace math { namespace detail{
 } // namespace detail
 
 template <class T1, class T2, class T3, class Policy>
-inline typename tools::promote_args<T1, T2, T3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type 
    ellint_rg(T1 x, T2 y, T3 z, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2, T3>::type result_type;
@@ -119,7 +118,7 @@ inline typename tools::promote_args<T1, T2, T3>::type
 }
 
 template <class T1, class T2, class T3>
-inline typename tools::promote_args<T1, T2, T3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type 
    ellint_rg(T1 x, T2 y, T3 z)
 {
    return ellint_rg(x, y, z, policies::policy<>());
diff --git a/include/boost/math/special_functions/ellint_rj.hpp b/include/boost/math/special_functions/ellint_rj.hpp
index f19eac2843..76e1a14eb4 100644
--- a/include/boost/math/special_functions/ellint_rj.hpp
+++ b/include/boost/math/special_functions/ellint_rj.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2006 Xiaogang Zhang, 2015 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -18,8 +19,9 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/ellint_rc.hpp>
 #include <boost/math/special_functions/ellint_rf.hpp>
@@ -32,7 +34,7 @@
 namespace boost { namespace math { namespace detail{
 
 template <typename T, typename Policy>
-T ellint_rc1p_imp(T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ellint_rc1p_imp(T y, const Policy& pol)
 {
    using namespace boost::math;
    // Calculate RC(1, 1 + x)
@@ -70,11 +72,11 @@ T ellint_rc1p_imp(T y, const Policy& pol)
 }
 
 template <typename T, typename Policy>
-T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T ellint_rj_imp_final(T x, T y, T z, T p, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)";
+   constexpr auto function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)";
 
    if(x < 0)
    {
@@ -94,37 +96,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol)
    }
    if(x + y == 0 || y + z == 0 || z + x == 0)
    {
-      return policies::raise_domain_error<T>(function, "At most one argument can be zero, only possible result is %1%.", std::numeric_limits<T>::quiet_NaN(), pol);
-   }
-
-   // for p < 0, the integral is singular, return Cauchy principal value
-   if(p < 0)
-   {
-      //
-      // We must ensure that x < y < z.
-      // Since the integral is symmetrical in x, y and z
-      // we can just permute the values:
-      //
-      if(x > y)
-         std::swap(x, y);
-      if(y > z)
-         std::swap(y, z);
-      if(x > y)
-         std::swap(x, y);
-
-      BOOST_MATH_ASSERT(x <= y);
-      BOOST_MATH_ASSERT(y <= z);
-
-      T q = -p;
-      p = (z * (x + y + q) - x * y) / (z + q);
-
-      BOOST_MATH_ASSERT(p >= 0);
-
-      T value = (p - z) * ellint_rj_imp(x, y, z, p, pol);
-      value -= 3 * ellint_rf_imp(x, y, z, pol);
-      value += 3 * sqrt((x * y * z) / (x * y + p * q)) * ellint_rc_imp(T(x * y + p * q), T(p * q), pol);
-      value /= (z + q);
-      return value;
+      return policies::raise_domain_error<T>(function, "At most one argument can be zero, only possible result is %1%.", boost::math::numeric_limits<T>::quiet_NaN(), pol);
    }
 
    //
@@ -148,13 +120,12 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol)
       else
       {
          // x = y only, permute so y = z:
-         using std::swap;
-         swap(x, z);
+         BOOST_MATH_GPU_SAFE_SWAP(x, z);
          if(y == p)
          {
             return ellint_rd_imp(x, y, y, pol);
          }
-         else if((std::max)(y, p) / (std::min)(y, p) > T(1.2))
+         else if(BOOST_MATH_GPU_SAFE_MAX(y, p) / BOOST_MATH_GPU_SAFE_MIN(y, p) > T(1.2))
          {
             return 3 * (ellint_rc_imp(x, y, pol) - ellint_rc_imp(x, p, pol)) / (p - y);
          }
@@ -168,7 +139,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol)
          // y = z = p:
          return ellint_rd_imp(x, y, y, pol);
       }
-      else if((std::max)(y, p) / (std::min)(y, p) > T(1.2))
+      else if(BOOST_MATH_GPU_SAFE_MAX(y, p) / BOOST_MATH_GPU_SAFE_MIN(y, p) > T(1.2))
       {
          // y = z:
          return 3 * (ellint_rc_imp(x, y, pol) - ellint_rc_imp(x, p, pol)) / (p - y);
@@ -187,7 +158,7 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol)
    T An = (x + y + z + 2 * p) / 5;
    T A0 = An;
    T delta = (p - x) * (p - y) * (p - z);
-   T Q = pow(tools::epsilon<T>() / 5, -T(1) / 8) * (std::max)((std::max)(fabs(An - x), fabs(An - y)), (std::max)(fabs(An - z), fabs(An - p)));
+   T Q = pow(tools::epsilon<T>() / 5, -T(1) / 8) * BOOST_MATH_GPU_SAFE_MAX(BOOST_MATH_GPU_SAFE_MAX(fabs(An - x), fabs(An - y)), BOOST_MATH_GPU_SAFE_MAX(fabs(An - z), fabs(An - p)));
 
    unsigned n;
    T lambda;
@@ -260,10 +231,71 @@ T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol)
    return result;
 }
 
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED T ellint_rj_imp(T x, T y, T z, T p, const Policy& pol)
+{
+   BOOST_MATH_STD_USING
+   
+   constexpr auto function = "boost::math::ellint_rj<%1%>(%1%,%1%,%1%)";
+
+   if(x < 0)
+   {
+      return policies::raise_domain_error<T>(function, "Argument x must be non-negative, but got x = %1%", x, pol);
+   }
+   if(y < 0)
+   {
+      return policies::raise_domain_error<T>(function, "Argument y must be non-negative, but got y = %1%", y, pol);
+   }
+   if(z < 0)
+   {
+      return policies::raise_domain_error<T>(function, "Argument z must be non-negative, but got z = %1%", z, pol);
+   }
+   if(p == 0)
+   {
+      return policies::raise_domain_error<T>(function, "Argument p must not be zero, but got p = %1%", p, pol);
+   }
+   if(x + y == 0 || y + z == 0 || z + x == 0)
+   {
+      return policies::raise_domain_error<T>(function, "At most one argument can be zero, only possible result is %1%.", boost::math::numeric_limits<T>::quiet_NaN(), pol);
+   }
+
+   // for p < 0, the integral is singular, return Cauchy principal value
+   if(p < 0)
+   {
+      //
+      // We must ensure that x < y < z.
+      // Since the integral is symmetrical in x, y and z
+      // we can just permute the values:
+      //
+      if(x > y)
+         BOOST_MATH_GPU_SAFE_SWAP(x, y);
+      if(y > z)
+         BOOST_MATH_GPU_SAFE_SWAP(y, z);
+      if(x > y)
+         BOOST_MATH_GPU_SAFE_SWAP(x, y);
+
+      BOOST_MATH_ASSERT(x <= y);
+      BOOST_MATH_ASSERT(y <= z);
+
+      T q = -p;
+      p = (z * (x + y + q) - x * y) / (z + q);
+
+      BOOST_MATH_ASSERT(p >= 0);
+
+      T value = (p - z) * ellint_rj_imp_final(x, y, z, p, pol);
+      value -= 3 * ellint_rf_imp(x, y, z, pol);
+      value += 3 * sqrt((x * y * z) / (x * y + p * q)) * ellint_rc_imp(T(x * y + p * q), T(p * q), pol);
+      value /= (z + q);
+      return value;
+   }
+
+   return ellint_rj_imp_final(x, y, z, p, pol);
+}
+
 } // namespace detail
 
 template <class T1, class T2, class T3, class T4, class Policy>
-inline typename tools::promote_args<T1, T2, T3, T4>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3, T4>::type 
    ellint_rj(T1 x, T2 y, T3 z, T4 p, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2, T3, T4>::type result_type;
@@ -278,7 +310,7 @@ inline typename tools::promote_args<T1, T2, T3, T4>::type
 }
 
 template <class T1, class T2, class T3, class T4>
-inline typename tools::promote_args<T1, T2, T3, T4>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3, T4>::type 
    ellint_rj(T1 x, T2 y, T3 z, T4 p)
 {
    return ellint_rj(x, y, z, p, policies::policy<>());
diff --git a/include/boost/math/special_functions/erf.hpp b/include/boost/math/special_functions/erf.hpp
index 57ff605299..9f0da9282f 100644
--- a/include/boost/math/special_functions/erf.hpp
+++ b/include/boost/math/special_functions/erf.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,8 +11,11 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
+#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/tools/roots.hpp>
 #include <boost/math/policies/error_handling.hpp>
@@ -39,7 +43,7 @@ template <class T>
 struct erf_asympt_series_t
 {
    // LCOV_EXCL_START multiprecision case only, excluded from coverage analysis
-   erf_asympt_series_t(T z) : xx(2 * -z * z), tk(1)
+   BOOST_MATH_GPU_ENABLED erf_asympt_series_t(T z) : xx(2 * -z * z), tk(1)
    {
       BOOST_MATH_STD_USING
       result = -exp(-z * z) / sqrt(boost::math::constants::pi<T>());
@@ -48,7 +52,7 @@ struct erf_asympt_series_t
 
    typedef T result_type;
 
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       BOOST_MATH_STD_USING
       T r = result;
@@ -68,33 +72,33 @@ struct erf_asympt_series_t
 // How large z has to be in order to ensure that the series converges:
 //
 template <class T>
-inline float erf_asymptotic_limit_N(const T&)
+BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const T&)
 {
    return (std::numeric_limits<float>::max)();
 }
-inline float erf_asymptotic_limit_N(const std::integral_constant<int, 24>&)
+BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant<int, 24>&)
 {
    return 2.8F;
 }
-inline float erf_asymptotic_limit_N(const std::integral_constant<int, 53>&)
+BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant<int, 53>&)
 {
    return 4.3F;
 }
-inline float erf_asymptotic_limit_N(const std::integral_constant<int, 64>&)
+BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant<int, 64>&)
 {
    return 4.8F;
 }
-inline float erf_asymptotic_limit_N(const std::integral_constant<int, 106>&)
+BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant<int, 106>&)
 {
    return 6.5F;
 }
-inline float erf_asymptotic_limit_N(const std::integral_constant<int, 113>&)
+BOOST_MATH_GPU_ENABLED inline float erf_asymptotic_limit_N(const std::integral_constant<int, 113>&)
 {
    return 6.8F;
 }
 
 template <class T, class Policy>
-inline T erf_asymptotic_limit()
+BOOST_MATH_GPU_ENABLED inline T erf_asymptotic_limit()
 {
    typedef typename policies::precision<T, Policy>::type precision_type;
    typedef std::integral_constant<int,
@@ -198,7 +202,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const Tag& t)
 }
 
 template <class T, class Policy>
-T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int, 53>& t)
+BOOST_MATH_GPU_ENABLED T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int, 53>&)
 {
    BOOST_MATH_STD_USING
 
@@ -207,14 +211,30 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
    if ((boost::math::isnan)(z))
       return policies::raise_domain_error("boost::math::erf<%1%>(%1%)", "Expected a finite argument but got %1%", z, pol);
 
+   int prefix_multiplier = 1;
+   int prefix_adder = 0;
+
    if(z < 0)
    {
+      // Recursion is logically simpler here, but confuses static analyzers that need to be
+      // able to calculate the maximimum program stack size at compile time (ie CUDA).
+      z = -z;
       if(!invert)
-         return -erf_imp(T(-z), invert, pol, t);
+      {
+         prefix_multiplier = -1;
+         // return -erf_imp(T(-z), invert, pol, t);
+      }
       else if(z < T(-0.5))
-         return 2 - erf_imp(T(-z), invert, pol, t);
+      {
+         prefix_adder = 2;
+         // return 2 - erf_imp(T(-z), invert, pol, t);
+      }
       else
-         return 1 + erf_imp(T(-z), false, pol, t);
+      {
+         invert = false;
+         prefix_adder = 1;
+         // return 1 + erf_imp(T(-z), false, pol, t);
+      }
    }
 
    T result;
@@ -237,7 +257,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
          }
          else
          {
-            static const T c = BOOST_MATH_BIG_CONSTANT(T, 53, 0.003379167095512573896158903121545171688);  // LCOV_EXCL_LINE
+            BOOST_MATH_STATIC_LOCAL_VARIABLE const T c = BOOST_MATH_BIG_CONSTANT(T, 53, 0.003379167095512573896158903121545171688);  // LCOV_EXCL_LINE
             result = static_cast<T>(z * 1.125f + z * c);
          }
       }
@@ -248,15 +268,15 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
          // Maximum Relative Change in Control Points:   1.155e-04
          // Max Error found at double precision =        2.961182e-17
          // LCOV_EXCL_START
-         static const T Y = 1.044948577880859375f;
-         static const T P[] = {    
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 1.044948577880859375f;
+         BOOST_MATH_STATIC const T P[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.0834305892146531832907),
             BOOST_MATH_BIG_CONSTANT(T, 53, -0.338165134459360935041),
             BOOST_MATH_BIG_CONSTANT(T, 53, -0.0509990735146777432841),
             BOOST_MATH_BIG_CONSTANT(T, 53, -0.00772758345802133288487),
             BOOST_MATH_BIG_CONSTANT(T, 53, -0.000322780120964605683831),
          };
-         static const T Q[] = {    
+         BOOST_MATH_STATIC const T Q[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.455004033050794024546),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.0875222600142252549554),
@@ -281,8 +301,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
          // Maximum Relative Change in Control Points:   2.845e-04
          // Max Error found at double precision =        4.841816e-17
          // LCOV_EXCL_START
-         static const T Y = 0.405935764312744140625f;
-         static const T P[] = {    
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 0.405935764312744140625f;
+         BOOST_MATH_STATIC const T P[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, -0.098090592216281240205),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.178114665841120341155),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.191003695796775433986),
@@ -290,7 +310,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.0195049001251218801359),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.00180424538297014223957),
          };
-         static const T Q[] = {    
+         BOOST_MATH_STATIC const T Q[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.84759070983002217845),
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.42628004845511324508),
@@ -316,8 +336,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
          // Expected Error Term:                         3.909e-18
          // Maximum Relative Change in Control Points:   9.886e-05
          // LCOV_EXCL_START
-         static const T Y = 0.50672817230224609375f;
-         static const T P[] = {    
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 0.50672817230224609375f;
+         BOOST_MATH_STATIC const T P[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, -0.0243500476207698441272),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.0386540375035707201728),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.04394818964209516296),
@@ -325,7 +345,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.00323962406290842133584),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.000235839115596880717416),
          };
-         static const T Q[] = {    
+         BOOST_MATH_STATIC const T Q[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.53991494948552447182),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.982403709157920235114),
@@ -351,8 +371,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
          // Maximum Relative Change in Control Points:   2.222e-04
          // Max Error found at double precision =        2.062515e-17
          // LCOV_EXCL_START
-         static const T Y = 0.5405750274658203125f;
-         static const T P[] = {    
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 0.5405750274658203125f;
+         BOOST_MATH_STATIC const T P[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.00295276716530971662634),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.0137384425896355332126),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.00840807615555585383007),
@@ -360,7 +380,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.000250269961544794627958),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.113212406648847561139e-4),
          };
-         static const T Q[] = {    
+         BOOST_MATH_STATIC const T Q[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.04217814166938418171),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.442597659481563127003),
@@ -386,8 +406,8 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
          // Expected Error Term:                         2.859e-17
          // Maximum Relative Change in Control Points:   1.357e-05
          // LCOV_EXCL_START
-         static const T Y = 0.5579090118408203125f;
-         static const T P[] = {    
+         BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 0.5579090118408203125f;
+         BOOST_MATH_STATIC const T P[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.00628057170626964891937),
             BOOST_MATH_BIG_CONSTANT(T, 53, 0.0175389834052493308818),
             BOOST_MATH_BIG_CONSTANT(T, 53, -0.212652252872804219852),
@@ -396,7 +416,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
             BOOST_MATH_BIG_CONSTANT(T, 53, -3.22729451764143718517),
             BOOST_MATH_BIG_CONSTANT(T, 53, -2.8175401114513378771),
          };
-         static const T Q[] = {    
+         BOOST_MATH_STATIC const T Q[] = {    
             BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
             BOOST_MATH_BIG_CONSTANT(T, 53, 2.79257750980575282228),
             BOOST_MATH_BIG_CONSTANT(T, 53, 11.0567237927800161565),
@@ -428,10 +448,11 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
 
    if(invert)
    {
-      result = 1 - result;
+      prefix_adder += prefix_multiplier * 1;
+      prefix_multiplier = -prefix_multiplier;
    }
 
-   return result;
+   return prefix_adder + prefix_multiplier * result;
 } // template <class T, class Lanczos>T erf_imp(T z, bool invert, const Lanczos& l, const std::integral_constant<int, 53>& t)
 
 
@@ -1175,7 +1196,7 @@ T erf_imp(T z, bool invert, const Policy& pol, const std::integral_constant<int,
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type erf(T z, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erf(T z, const Policy& /* pol */)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -1208,7 +1229,7 @@ inline typename tools::promote_args<T>::type erf(T z, const Policy& /* pol */)
 }
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type erfc(T z, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erfc(T z, const Policy& /* pol */)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -1241,13 +1262,13 @@ inline typename tools::promote_args<T>::type erfc(T z, const Policy& /* pol */)
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type erf(T z)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erf(T z)
 {
    return boost::math::erf(z, policies::policy<>());
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type erfc(T z)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type erfc(T z)
 {
    return boost::math::erfc(z, policies::policy<>());
 }
@@ -1255,6 +1276,64 @@ inline typename tools::promote_args<T>::type erfc(T z)
 } // namespace math
 } // namespace boost
 
+#else // Special handling for NVRTC platform
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto erf(T x)
+{
+   return ::erf(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED auto erf(float x)
+{
+   return ::erff(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto erf(T x, const Policy&)
+{
+   return ::erf(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED auto erf(float x, const Policy&)
+{
+   return ::erff(x);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto erfc(T x)
+{
+   return ::erfc(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED auto erfc(float x)
+{
+   return ::erfcf(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto erfc(T x, const Policy&)
+{
+   return ::erfc(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED auto erfc(float x, const Policy&)
+{
+   return ::erfcf(x);
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTC
+
 #include <boost/math/special_functions/detail/erf_inv.hpp>
 
 #endif // BOOST_MATH_SPECIAL_ERF_HPP
diff --git a/include/boost/math/special_functions/expint.hpp b/include/boost/math/special_functions/expint.hpp
index 1475a9a88b..09e97bd4fc 100644
--- a/include/boost/math/special_functions/expint.hpp
+++ b/include/boost/math/special_functions/expint.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2007.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,6 +13,10 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/tools/precision.hpp>
 #include <boost/math/tools/promotion.hpp>
 #include <boost/math/tools/fraction.hpp>
@@ -20,7 +25,6 @@
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/digamma.hpp>
 #include <boost/math/special_functions/log1p.hpp>
-#include <boost/math/special_functions/pow.hpp>
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -35,13 +39,13 @@
 namespace boost{ namespace math{
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    expint(unsigned n, T z, const Policy& /*pol*/);
 
 namespace detail{
 
 template <class T>
-inline T expint_1_rational(const T& z, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED inline T expint_1_rational(const T& z, const boost::math::integral_constant<int, 0>&)
 {
    // this function is never actually called
    BOOST_MATH_ASSERT(0);
@@ -49,7 +53,7 @@ inline T expint_1_rational(const T& z, const std::integral_constant<int, 0>&)
 }
 
 template <class T>
-T expint_1_rational(const T& z, const std::integral_constant<int, 53>&)
+BOOST_MATH_GPU_ENABLED T expint_1_rational(const T& z, const boost::math::integral_constant<int, 53>&)
 {
    BOOST_MATH_STD_USING
    T result;
@@ -123,7 +127,7 @@ T expint_1_rational(const T& z, const std::integral_constant<int, 53>&)
 }
 
 template <class T>
-T expint_1_rational(const T& z, const std::integral_constant<int, 64>&)
+BOOST_MATH_GPU_ENABLED T expint_1_rational(const T& z, const boost::math::integral_constant<int, 64>&)
 {
    BOOST_MATH_STD_USING
    T result;
@@ -204,7 +208,7 @@ T expint_1_rational(const T& z, const std::integral_constant<int, 64>&)
 }
 
 template <class T>
-T expint_1_rational(const T& z, const std::integral_constant<int, 113>&)
+BOOST_MATH_GPU_ENABLED T expint_1_rational(const T& z, const boost::math::integral_constant<int, 113>&)
 {
    BOOST_MATH_STD_USING
    T result;
@@ -351,14 +355,15 @@ T expint_1_rational(const T& z, const std::integral_constant<int, 113>&)
    return result;
 }
 
+
 template <class T>
 struct expint_fraction
 {
-   typedef std::pair<T,T> result_type;
-   expint_fraction(unsigned n_, T z_) : b(n_ + z_), i(-1), n(n_){}
-   std::pair<T,T> operator()()
+   typedef boost::math::pair<T,T> result_type;
+   BOOST_MATH_GPU_ENABLED expint_fraction(unsigned n_, T z_) : b(n_ + z_), i(-1), n(n_){}
+   BOOST_MATH_GPU_ENABLED boost::math::pair<T,T> operator()()
    {
-      std::pair<T,T> result = std::make_pair(-static_cast<T>((i+1) * (n+i)), b);
+      boost::math::pair<T,T> result = boost::math::make_pair(-static_cast<T>((i+1) * (n+i)), b);
       b += 2;
       ++i;
       return result;
@@ -370,11 +375,11 @@ struct expint_fraction
 };
 
 template <class T, class Policy>
-inline T expint_as_fraction(unsigned n, T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T expint_as_fraction(unsigned n, T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    BOOST_MATH_INSTRUMENT_VARIABLE(z)
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
    expint_fraction<T> f(n, z);
    T result = tools::continued_fraction_b(
       f,
@@ -392,9 +397,9 @@ template <class T>
 struct expint_series
 {
    typedef T result_type;
-   expint_series(unsigned k_, T z_, T x_k_, T denom_, T fact_)
+   BOOST_MATH_GPU_ENABLED expint_series(unsigned k_, T z_, T x_k_, T denom_, T fact_)
       : k(k_), z(z_), x_k(x_k_), denom(denom_), fact(fact_){}
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       x_k *= -z;
       denom += 1;
@@ -410,10 +415,10 @@ struct expint_series
 };
 
 template <class T, class Policy>
-inline T expint_as_series(unsigned n, T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T expint_as_series(unsigned n, T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
    BOOST_MATH_INSTRUMENT_VARIABLE(z)
 
@@ -443,10 +448,10 @@ inline T expint_as_series(unsigned n, T z, const Policy& pol)
 }
 
 template <class T, class Policy, class Tag>
-T expint_imp(unsigned n, T z, const Policy& pol, const Tag& tag)
+BOOST_MATH_GPU_ENABLED T expint_imp(unsigned n, T z, const Policy& pol, const Tag& tag)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::expint<%1%>(unsigned, %1%)";
+   constexpr auto function = "boost::math::expint<%1%>(unsigned, %1%)";
    if(z < 0)
       return policies::raise_domain_error<T>(function, "Function requires z >= 0 but got %1%.", z, pol);
    if(z == 0)
@@ -468,15 +473,21 @@ T expint_imp(unsigned n, T z, const Policy& pol, const Tag& tag)
 #  pragma warning(disable:4127) // conditional expression is constant
 #endif
    if(n == 0)
+   {
       result = exp(-z) / z;
+   }
    else if((n == 1) && (Tag::value))
    {
       result = expint_1_rational(z, tag);
    }
    else if(f)
+   {
       result = expint_as_series(n, z, pol);
+   }
    else
+   {
       result = expint_as_fraction(n, z, pol);
+   }
 #ifdef _MSC_VER
 #  pragma warning(pop)
 #endif
@@ -488,8 +499,8 @@ template <class T>
 struct expint_i_series
 {
    typedef T result_type;
-   expint_i_series(T z_) : k(0), z_k(1), z(z_){}
-   T operator()()
+   BOOST_MATH_GPU_ENABLED expint_i_series(T z_) : k(0), z_k(1), z(z_){}
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       z_k *= z / ++k;
       return z_k / k;
@@ -501,22 +512,22 @@ struct expint_i_series
 };
 
 template <class T, class Policy>
-T expint_i_as_series(T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T expint_i_as_series(T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    T result = log(z); // (log(z) - log(1 / z)) / 2;
    result += constants::euler<T>();
    expint_i_series<T> s(z);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
    result = tools::sum_series(s, policies::get_epsilon<T, Policy>(), max_iter, result);
    policies::check_series_iterations<T>("boost::math::expint_i_series<%1%>(%1%)", max_iter, pol);
    return result;
 }
 
 template <class T, class Policy, class Tag>
-T expint_i_imp(T z, const Policy& pol, const Tag& tag)
+BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const Tag& tag)
 {
-   static const char* function = "boost::math::expint<%1%>(%1%)";
+   constexpr auto function = "boost::math::expint<%1%>(%1%)";
    if(z < 0)
       return -expint_imp(1, T(-z), pol, tag);
    if(z == 0)
@@ -525,10 +536,10 @@ T expint_i_imp(T z, const Policy& pol, const Tag& tag)
 }
 
 template <class T, class Policy>
-T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& tag)
+BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const boost::math::integral_constant<int, 53>& tag)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::expint<%1%>(%1%)";
+   constexpr auto function = "boost::math::expint<%1%>(%1%)";
    if(z < 0)
       return -expint_imp(1, T(-z), pol, tag);
    if(z == 0)
@@ -541,7 +552,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
       // Maximum Deviation Found:                     2.852e-18
       // Expected Error Term:                         2.852e-18
       // Max Error found at double precision =        Poly: 2.636335e-16   Cheb: 4.187027e-16
-      static const T P[10] = {
+      BOOST_MATH_STATIC const T P[10] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, 2.98677224343598593013),
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.356343618769377415068),
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.780836076283730801839),
@@ -553,7 +564,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.798296365679269702435e-5),
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.2777056254402008721e-6)
       };
-      static const T Q[8] = {
+      BOOST_MATH_STATIC const T Q[8] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
          BOOST_MATH_BIG_CONSTANT(T, 53, -1.17090412365413911947),
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.62215109846016746276),
@@ -564,11 +575,11 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.138972589601781706598e-4)
       };
 
-      static const T c1 = BOOST_MATH_BIG_CONSTANT(T, 53, 1677624236387711.0);
-      static const T c2 = BOOST_MATH_BIG_CONSTANT(T, 53, 4503599627370496.0);
-      static const T r1 = static_cast<T>(c1 / c2);
-      static const T r2 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.131401834143860282009280387409357165515556574352422001206362e-16);
-      static const T r = static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 53, 0.372507410781366634461991866580119133535689497771654051555657435242200120636201854384926049951548942392));
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T c1 = BOOST_MATH_BIG_CONSTANT(T, 53, 1677624236387711.0);
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T c2 = BOOST_MATH_BIG_CONSTANT(T, 53, 4503599627370496.0);
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T r1 = static_cast<T>(c1 / c2);
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T r2 = BOOST_MATH_BIG_CONSTANT(T, 53, 0.131401834143860282009280387409357165515556574352422001206362e-16);
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T r = static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 53, 0.372507410781366634461991866580119133535689497771654051555657435242200120636201854384926049951548942392));
       T t = (z / 3) - 1;
       result = tools::evaluate_polynomial(P, t)
          / tools::evaluate_polynomial(Q, t);
@@ -588,8 +599,8 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
       // Maximum Deviation Found:                     6.546e-17
       // Expected Error Term:                         6.546e-17
       // Max Error found at double precision =        Poly: 6.890169e-17   Cheb: 6.772128e-17
-      static const T Y = 1.158985137939453125F;
-      static const T P[8] = {
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 1.158985137939453125F;
+      BOOST_MATH_STATIC const T P[8] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.00139324086199402804173),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.0349921221823888744966),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.0264095520754134848538),
@@ -599,7 +610,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.554086272024881826253e-4),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.396487648924804510056e-5)
       };
-      static const T Q[8] = {
+      BOOST_MATH_STATIC const T Q[8] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.744625566823272107711),
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.329061095011767059236),
@@ -621,8 +632,8 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
       // Expected Error Term:                         -1.842e-17
       // Max Error found at double precision =        Poly: 4.375868e-17   Cheb: 5.860967e-17
 
-      static const T Y = 1.0869731903076171875F;
-      static const T P[9] = {
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 1.0869731903076171875F;
+      BOOST_MATH_STATIC const T P[9] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.00893891094356945667451),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.0484607730127134045806),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.0652810444222236895772),
@@ -633,7 +644,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.000209750022660200888349),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.138652200349182596186e-4)
       };
-      static const T Q[9] = {
+      BOOST_MATH_STATIC const T Q[9] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
          BOOST_MATH_BIG_CONSTANT(T, 53, 1.97017214039061194971),
          BOOST_MATH_BIG_CONSTANT(T, 53, 1.86232465043073157508),
@@ -657,8 +668,8 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
       // Max Error found at double precision =        Poly: 1.441088e-16   Cheb: 1.864792e-16
 
 
-      static const T Y = 1.03937530517578125F;
-      static const T P[9] = {
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y = 1.03937530517578125F;
+      BOOST_MATH_STATIC const T P[9] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.00356165148914447597995),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.0229930320357982333406),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.0449814350482277917716),
@@ -669,7 +680,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.000192178045857733706044),
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.113161784705911400295e-9)
       };
-      static const T Q[9] = {
+      BOOST_MATH_STATIC const T Q[9] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
          BOOST_MATH_BIG_CONSTANT(T, 53, 2.84354408840148561131),
          BOOST_MATH_BIG_CONSTANT(T, 53, 3.6599610090072393012),
@@ -688,9 +699,9 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
    else
    {
       // Max Error found at double precision =        3.381886e-17
-      static const T exp40 = static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 53, 2.35385266837019985407899910749034804508871617254555467236651e17));
-      static const T Y= 1.013065338134765625F;
-      static const T P[6] = {
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T exp40 = static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 53, 2.35385266837019985407899910749034804508871617254555467236651e17));
+      BOOST_MATH_STATIC_LOCAL_VARIABLE const T Y= 1.013065338134765625F;
+      BOOST_MATH_STATIC const T P[6] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, -0.0130653381347656243849),
          BOOST_MATH_BIG_CONSTANT(T, 53, 0.19029710559486576682),
          BOOST_MATH_BIG_CONSTANT(T, 53, 94.7365094537197236011),
@@ -698,7 +709,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
          BOOST_MATH_BIG_CONSTANT(T, 53, 18932.0850014925993025),
          BOOST_MATH_BIG_CONSTANT(T, 53, -38703.1431362056714134)
       };
-      static const T Q[7] = {
+      BOOST_MATH_STATIC const T Q[7] = {
          BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
          BOOST_MATH_BIG_CONSTANT(T, 53, 61.9733592849439884145),
          BOOST_MATH_BIG_CONSTANT(T, 53, -2354.56211323420194283),
@@ -739,10 +750,10 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 53>& ta
 }
 
 template <class T, class Policy>
-T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 64>& tag)
+BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const boost::math::integral_constant<int, 64>& tag)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::expint<%1%>(%1%)";
+   constexpr auto function = "boost::math::expint<%1%>(%1%)";
    if(z < 0)
       return -expint_imp(1, T(-z), pol, tag);
    if(z == 0)
@@ -976,7 +987,7 @@ T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 64>& ta
 }
 
 template <class T, class Policy>
-void expint_i_imp_113a(T& result, const T& z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED void expint_i_imp_113a(T& result, const T& z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     1.230e-36
@@ -1044,7 +1055,7 @@ void expint_i_imp_113a(T& result, const T& z, const Policy& pol)
 }
 
 template <class T>
-void expint_i_113b(T& result, const T& z)
+BOOST_MATH_GPU_ENABLED void expint_i_113b(T& result, const T& z)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     7.779e-36
@@ -1094,7 +1105,7 @@ void expint_i_113b(T& result, const T& z)
 }
 
 template <class T>
-void expint_i_113c(T& result, const T& z)
+BOOST_MATH_GPU_ENABLED void expint_i_113c(T& result, const T& z)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     1.082e-34
@@ -1147,7 +1158,7 @@ void expint_i_113c(T& result, const T& z)
 }
 
 template <class T>
-void expint_i_113d(T& result, const T& z)
+BOOST_MATH_GPU_ENABLED void expint_i_113d(T& result, const T& z)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     3.163e-35
@@ -1198,7 +1209,7 @@ void expint_i_113d(T& result, const T& z)
 }
 
 template <class T>
-void expint_i_113e(T& result, const T& z)
+BOOST_MATH_GPU_ENABLED void expint_i_113e(T& result, const T& z)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     7.972e-36
@@ -1252,7 +1263,7 @@ void expint_i_113e(T& result, const T& z)
 }
 
 template <class T>
-void expint_i_113f(T& result, const T& z)
+BOOST_MATH_GPU_ENABLED void expint_i_113f(T& result, const T& z)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     4.469e-36
@@ -1299,7 +1310,7 @@ void expint_i_113f(T& result, const T& z)
 }
 
 template <class T>
-void expint_i_113g(T& result, const T& z)
+BOOST_MATH_GPU_ENABLED void expint_i_113g(T& result, const T& z)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     5.588e-35
@@ -1344,7 +1355,7 @@ void expint_i_113g(T& result, const T& z)
 }
 
 template <class T>
-void expint_i_113h(T& result, const T& z)
+BOOST_MATH_GPU_ENABLED void expint_i_113h(T& result, const T& z)
 {
    BOOST_MATH_STD_USING
    // Maximum Deviation Found:                     4.448e-36
@@ -1383,10 +1394,10 @@ void expint_i_113h(T& result, const T& z)
 }
 
 template <class T, class Policy>
-T expint_i_imp(T z, const Policy& pol, const std::integral_constant<int, 113>& tag)
+BOOST_MATH_GPU_ENABLED T expint_i_imp(T z, const Policy& pol, const boost::math::integral_constant<int, 113>& tag)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::expint<%1%>(%1%)";
+   constexpr auto function = "boost::math::expint<%1%>(%1%)";
    if(z < 0)
       return -expint_imp(1, T(-z), pol, tag);
    if(z == 0)
@@ -1491,12 +1502,12 @@ struct expint_i_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          do_init(tag());
       }
-      static void do_init(const std::integral_constant<int, 0>&){}
-      static void do_init(const std::integral_constant<int, 53>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 0>&){}
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 53>&)
       {
          boost::math::expint(T(5), Policy());
          boost::math::expint(T(7), Policy());
@@ -1504,7 +1515,7 @@ struct expint_i_initializer
          boost::math::expint(T(38), Policy());
          boost::math::expint(T(45), Policy());
       }
-      static void do_init(const std::integral_constant<int, 64>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 64>&)
       {
          boost::math::expint(T(5), Policy());
          boost::math::expint(T(7), Policy());
@@ -1512,7 +1523,7 @@ struct expint_i_initializer
          boost::math::expint(T(38), Policy());
          boost::math::expint(T(45), Policy());
       }
-      static void do_init(const std::integral_constant<int, 113>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 113>&)
       {
          boost::math::expint(T(5), Policy());
          boost::math::expint(T(7), Policy());
@@ -1524,12 +1535,14 @@ struct expint_i_initializer
          boost::math::expint(T(200), Policy());
          boost::math::expint(T(220), Policy());
       }
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
    static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -1541,33 +1554,35 @@ struct expint_1_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          do_init(tag());
       }
-      static void do_init(const std::integral_constant<int, 0>&){}
-      static void do_init(const std::integral_constant<int, 53>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 0>&){}
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 53>&)
       {
          boost::math::expint(1, T(0.5), Policy());
          boost::math::expint(1, T(2), Policy());
       }
-      static void do_init(const std::integral_constant<int, 64>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 64>&)
       {
          boost::math::expint(1, T(0.5), Policy());
          boost::math::expint(1, T(2), Policy());
       }
-      static void do_init(const std::integral_constant<int, 113>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 113>&)
       {
          boost::math::expint(1, T(0.5), Policy());
          boost::math::expint(1, T(2), Policy());
          boost::math::expint(1, T(6), Policy());
       }
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
    static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -1575,8 +1590,8 @@ template <class T, class Policy, class tag>
 const typename expint_1_initializer<T, Policy, tag>::init expint_1_initializer<T, Policy, tag>::initializer;
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
-   expint_forwarder(T z, const Policy& /*pol*/, std::true_type const&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
+   expint_forwarder(T z, const Policy& /*pol*/, boost::math::true_type const&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -1587,7 +1602,7 @@ inline typename tools::promote_args<T>::type
       policies::promote_double<false>,
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
-   typedef std::integral_constant<int,
+   typedef boost::math::integral_constant<int,
       precision_type::value <= 0 ? 0 :
       precision_type::value <= 53 ? 53 :
       precision_type::value <= 64 ? 64 :
@@ -1603,8 +1618,8 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
-expint_forwarder(unsigned n, T z, const std::false_type&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
+expint_forwarder(unsigned n, T z, const boost::math::false_type&)
 {
    return boost::math::expint(n, z, policies::policy<>());
 }
@@ -1612,7 +1627,7 @@ expint_forwarder(unsigned n, T z, const std::false_type&)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    expint(unsigned n, T z, const Policy& /*pol*/)
 {
    typedef typename tools::promote_args<T>::type result_type;
@@ -1624,7 +1639,7 @@ inline typename tools::promote_args<T>::type
       policies::promote_double<false>,
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
-   typedef std::integral_constant<int,
+   typedef boost::math::integral_constant<int,
       precision_type::value <= 0 ? 0 :
       precision_type::value <= 53 ? 53 :
       precision_type::value <= 64 ? 64 :
@@ -1641,7 +1656,7 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T, class U>
-inline typename detail::expint_result<T, U>::type
+BOOST_MATH_GPU_ENABLED inline typename detail::expint_result<T, U>::type
    expint(T const z, U const u)
 {
    typedef typename policies::is_policy<U>::type tag_type;
@@ -1649,7 +1664,7 @@ inline typename detail::expint_result<T, U>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    expint(T z)
 {
    return expint(z, policies::policy<>());
diff --git a/include/boost/math/special_functions/expm1.hpp b/include/boost/math/special_functions/expm1.hpp
index eec6356031..5e61ca20b0 100644
--- a/include/boost/math/special_functions/expm1.hpp
+++ b/include/boost/math/special_functions/expm1.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,10 +11,10 @@
 #pragma once
 #endif
 
-#include <cmath>
-#include <cstdint>
-#include <limits>
 #include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <boost/math/tools/series.hpp>
 #include <boost/math/tools/precision.hpp>
 #include <boost/math/tools/big_constant.hpp>
@@ -21,6 +22,9 @@
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -45,10 +49,10 @@ namespace detail
   {
      typedef T result_type;
 
-     expm1_series(T x)
+     BOOST_MATH_GPU_ENABLED expm1_series(T x)
         : k(0), m_x(x), m_term(1) {}
 
-     T operator()()
+     BOOST_MATH_GPU_ENABLED T operator()()
      {
         ++k;
         m_term *= m_x;
@@ -56,7 +60,7 @@ namespace detail
         return m_term;
      }
 
-     int count()const
+     BOOST_MATH_GPU_ENABLED int count()const
      {
         return k;
      }
@@ -74,26 +78,28 @@ struct expm1_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          do_init(tag());
       }
       template <int N>
-      static void do_init(const std::integral_constant<int, N>&){}
-      static void do_init(const std::integral_constant<int, 64>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, N>&){}
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 64>&)
       {
          expm1(T(0.5));
       }
-      static void do_init(const std::integral_constant<int, 113>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 113>&)
       {
          expm1(T(0.5));
       }
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
-   static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_STATIC const init initializer;
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -106,7 +112,7 @@ const typename expm1_initializer<T, Policy, tag>::init expm1_initializer<T, Poli
 // This version uses a Taylor series expansion for 0.5 > |x| > epsilon.
 //
 template <class T, class Policy>
-T expm1_imp(T x, const std::integral_constant<int, 0>&, const Policy& pol)
+T expm1_imp(T x, const boost::math::integral_constant<int, 0>&, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -128,7 +134,7 @@ T expm1_imp(T x, const std::integral_constant<int, 0>&, const Policy& pol)
    if(a < tools::epsilon<T>())
       return x;
    detail::expm1_series<T> s(x);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
    T result = tools::sum_series(s, policies::get_epsilon<T, Policy>(), max_iter);
 
@@ -137,7 +143,7 @@ T expm1_imp(T x, const std::integral_constant<int, 0>&, const Policy& pol)
 }
 
 template <class T, class P>
-T expm1_imp(T x, const std::integral_constant<int, 53>&, const P& pol)
+BOOST_MATH_GPU_ENABLED T expm1_imp(T x, const boost::math::integral_constant<int, 53>&, const P& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -155,16 +161,16 @@ T expm1_imp(T x, const std::integral_constant<int, 53>&, const P& pol)
    if(a < tools::epsilon<T>())
       return x;
 
-   static const float Y = 0.10281276702880859e1f;
-   static const T n[] = { static_cast<T>(-0.28127670288085937e-1), static_cast<T>(0.51278186299064534e0), static_cast<T>(-0.6310029069350198e-1), static_cast<T>(0.11638457975729296e-1), static_cast<T>(-0.52143390687521003e-3), static_cast<T>(0.21491399776965688e-4) };
-   static const T d[] = { 1, static_cast<T>(-0.45442309511354755e0), static_cast<T>(0.90850389570911714e-1), static_cast<T>(-0.10088963629815502e-1), static_cast<T>(0.63003407478692265e-3), static_cast<T>(-0.17976570003654402e-4) };
+   BOOST_MATH_STATIC const float Y = 0.10281276702880859e1f;
+   BOOST_MATH_STATIC const T n[] = { static_cast<T>(-0.28127670288085937e-1), static_cast<T>(0.51278186299064534e0), static_cast<T>(-0.6310029069350198e-1), static_cast<T>(0.11638457975729296e-1), static_cast<T>(-0.52143390687521003e-3), static_cast<T>(0.21491399776965688e-4) };
+   BOOST_MATH_STATIC const T d[] = { 1, static_cast<T>(-0.45442309511354755e0), static_cast<T>(0.90850389570911714e-1), static_cast<T>(-0.10088963629815502e-1), static_cast<T>(0.63003407478692265e-3), static_cast<T>(-0.17976570003654402e-4) };
 
    T result = x * Y + x * tools::evaluate_polynomial(n, x) / tools::evaluate_polynomial(d, x);
    return result;
 }
 
 template <class T, class P>
-T expm1_imp(T x, const std::integral_constant<int, 64>&, const P& pol)
+BOOST_MATH_GPU_ENABLED T expm1_imp(T x, const boost::math::integral_constant<int, 64>&, const P& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -182,8 +188,8 @@ T expm1_imp(T x, const std::integral_constant<int, 64>&, const P& pol)
    if(a < tools::epsilon<T>())
       return x;
 
-   static const float Y = 0.10281276702880859375e1f;
-   static const T n[] = {
+   BOOST_MATH_STATIC const float Y = 0.10281276702880859375e1f;
+   BOOST_MATH_STATIC const T n[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.281276702880859375e-1),
        BOOST_MATH_BIG_CONSTANT(T, 64, 0.512980290285154286358e0),
        BOOST_MATH_BIG_CONSTANT(T, 64, -0.667758794592881019644e-1),
@@ -192,7 +198,7 @@ T expm1_imp(T x, const std::integral_constant<int, 64>&, const P& pol)
        BOOST_MATH_BIG_CONSTANT(T, 64, 0.447441185192951335042e-4),
        BOOST_MATH_BIG_CONSTANT(T, 64, -0.714539134024984593011e-6)
    };
-   static const T d[] = {
+   BOOST_MATH_STATIC const T d[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.461477618025562520389e0),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.961237488025708540713e-1),
@@ -207,7 +213,7 @@ T expm1_imp(T x, const std::integral_constant<int, 64>&, const P& pol)
 }
 
 template <class T, class P>
-T expm1_imp(T x, const std::integral_constant<int, 113>&, const P& pol)
+BOOST_MATH_GPU_ENABLED T expm1_imp(T x, const boost::math::integral_constant<int, 113>&, const P& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -259,7 +265,7 @@ T expm1_imp(T x, const std::integral_constant<int, 113>&, const P& pol)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type expm1(T x, const Policy& /* pol */)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type expm1(T x, const Policy& /* pol */)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -271,7 +277,7 @@ inline typename tools::promote_args<T>::type expm1(T x, const Policy& /* pol */)
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   typedef std::integral_constant<int,
+   typedef boost::math::integral_constant<int,
       precision_type::value <= 0 ? 0 :
       precision_type::value <= 53 ? 53 :
       precision_type::value <= 64 ? 64 :
@@ -294,18 +300,18 @@ inline typename tools::promote_args<T>::type expm1(T x, const Policy& /* pol */)
 
 #if defined(BOOST_HAS_EXPM1) && !(defined(__osf__) && defined(__DECCXX_VER))
 #  ifdef BOOST_MATH_USE_C99
-inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); }
+BOOST_MATH_GPU_ENABLED inline float expm1(float x, const policies::policy<>&){ return ::expm1f(x); }
 #     ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
 inline long double expm1(long double x, const policies::policy<>&){ return ::expm1l(x); }
 #     endif
 #  else
 inline float expm1(float x, const policies::policy<>&){ return static_cast<float>(::expm1(x)); }
 #  endif
-inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); }
+BOOST_MATH_GPU_ENABLED inline double expm1(double x, const policies::policy<>&){ return ::expm1(x); }
 #endif
 
 template <class T>
-inline typename tools::promote_args<T>::type expm1(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type expm1(T x)
 {
    return expm1(x, policies::policy<>());
 }
@@ -313,6 +319,40 @@ inline typename tools::promote_args<T>::type expm1(T x)
 } // namespace math
 } // namespace boost
 
+#else // Special handling for NVRTC 
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto expm1(T x)
+{
+   return ::expm1(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED auto expm1(float x)
+{
+   return ::expm1f(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto expm1(T x, const Policy&)
+{
+   return ::expm1(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED auto expm1(float x, const Policy&)
+{
+   return ::expm1f(x);
+}
+
+} // Namespace math
+} // Namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTC
+
 #endif // BOOST_MATH_HYPOT_INCLUDED
 
 
diff --git a/include/boost/math/special_functions/factorials.hpp b/include/boost/math/special_functions/factorials.hpp
index 7229635cb9..ec6978bdc5 100644
--- a/include/boost/math/special_functions/factorials.hpp
+++ b/include/boost/math/special_functions/factorials.hpp
@@ -10,10 +10,14 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/precision.hpp>
+#include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/gamma.hpp>
 #include <boost/math/special_functions/detail/unchecked_factorial.hpp>
-#include <array>
+#include <boost/math/special_functions/math_fwd.hpp>
+
 #ifdef _MSC_VER
 #pragma warning(push) // Temporary until lexical cast fixed.
 #pragma warning(disable: 4127 4701)
@@ -21,16 +25,14 @@
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
-#include <type_traits>
-#include <cmath>
 
 namespace boost { namespace math
 {
 
 template <class T, class Policy>
-inline T factorial(unsigned i, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T factorial(unsigned i, const Policy& pol)
 {
-   static_assert(!std::is_integral<T>::value, "Type T must not be an integral type");
+   static_assert(!boost::math::is_integral<T>::value, "Type T must not be an integral type");
    // factorial<unsigned int>(n) is not implemented
    // because it would overflow integral type T for too small n
    // to be useful. Use instead a floating-point type,
@@ -49,7 +51,7 @@ inline T factorial(unsigned i, const Policy& pol)
 }
 
 template <class T>
-inline T factorial(unsigned i)
+BOOST_MATH_GPU_ENABLED inline T factorial(unsigned i)
 {
    return factorial<T>(i, policies::policy<>());
 }
@@ -72,9 +74,9 @@ inline double factorial<double>(unsigned i)
 }
 */
 template <class T, class Policy>
-T double_factorial(unsigned i, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T double_factorial(unsigned i, const Policy& pol)
 {
-   static_assert(!std::is_integral<T>::value, "Type T must not be an integral type");
+   static_assert(!boost::math::is_integral<T>::value, "Type T must not be an integral type");
    BOOST_MATH_STD_USING  // ADL lookup of std names
    if(i & 1)
    {
@@ -107,17 +109,20 @@ T double_factorial(unsigned i, const Policy& pol)
 }
 
 template <class T>
-inline T double_factorial(unsigned i)
+BOOST_MATH_GPU_ENABLED inline T double_factorial(unsigned i)
 {
    return double_factorial<T>(i, policies::policy<>());
 }
 
+// TODO(mborland): We do not currently have support for tgamma_delta_ratio
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 namespace detail{
 
 template <class T, class Policy>
 T rising_factorial_imp(T x, int n, const Policy& pol)
 {
-   static_assert(!std::is_integral<T>::value, "Type T must not be an integral type");
+   static_assert(!boost::math::is_integral<T>::value, "Type T must not be an integral type");
    if(x < 0)
    {
       //
@@ -165,7 +170,7 @@ T rising_factorial_imp(T x, int n, const Policy& pol)
 template <class T, class Policy>
 inline T falling_factorial_imp(T x, unsigned n, const Policy& pol)
 {
-   static_assert(!std::is_integral<T>::value, "Type T must not be an integral type");
+   static_assert(!boost::math::is_integral<T>::value, "Type T must not be an integral type");
    BOOST_MATH_STD_USING // ADL of std names
    if(x == 0)
       return 0;
@@ -262,6 +267,8 @@ inline typename tools::promote_args<RT>::type
       static_cast<result_type>(x), n, pol);
 }
 
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
 } // namespace math
 } // namespace boost
 
diff --git a/include/boost/math/special_functions/fpclassify.hpp b/include/boost/math/special_functions/fpclassify.hpp
index 2c504d7ac8..0ac9470f28 100644
--- a/include/boost/math/special_functions/fpclassify.hpp
+++ b/include/boost/math/special_functions/fpclassify.hpp
@@ -1,5 +1,6 @@
 //  Copyright John Maddock 2005-2008.
 //  Copyright (c) 2006-2008 Johan Rade
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -11,12 +12,17 @@
 #pragma once
 #endif
 
-#include <limits>
-#include <type_traits>
-#include <cmath>
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <boost/math/tools/real_cast.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/detail/fp_traits.hpp>
+#include <limits>
+#include <type_traits>
+#include <cmath>
+
 /*!
   \file fpclassify.hpp
   \brief Classify floating-point value as normal, subnormal, zero, infinite, or NaN.
@@ -76,6 +82,80 @@ is used.
 
 */
 
+#ifdef BOOST_MATH_HAS_GPU_SUPPORT
+
+namespace boost { namespace math {
+
+template<> inline BOOST_MATH_GPU_ENABLED bool (isnan)(float x) { return x != x; }
+template<> inline BOOST_MATH_GPU_ENABLED bool (isnan)(double x) { return x != x; }
+
+template<> inline BOOST_MATH_GPU_ENABLED bool (isinf)(float x) { return x > FLT_MAX || x < -FLT_MAX; }
+template<> inline BOOST_MATH_GPU_ENABLED bool (isinf)(double x) { return x > DBL_MAX || x < -DBL_MAX; }
+
+template<> inline BOOST_MATH_GPU_ENABLED bool (isfinite)(float x) {  return !isnan(x) && !isinf(x);  }
+template<> inline BOOST_MATH_GPU_ENABLED bool (isfinite)(double x) {  return !isnan(x) && !isinf(x); }
+
+template<> inline BOOST_MATH_GPU_ENABLED bool (isnormal)(float x)
+{
+   if(x < 0) x = -x;
+   return (x >= FLT_MIN) && (x <= FLT_MAX);
+}
+template<> inline BOOST_MATH_GPU_ENABLED bool (isnormal)(double x)
+{
+   if(x < 0) x = -x;
+   return (x >= DBL_MIN) && (x <= DBL_MAX);
+}
+
+template<> inline BOOST_MATH_GPU_ENABLED int (fpclassify)(float t)
+{
+   if((boost::math::isnan)(t))
+      return FP_NAN;
+   // std::fabs broken on a few systems especially for long long!!!!
+   float at = (t < 0.0f) ? -t : t;
+
+   // Use a process of exclusion to figure out
+   // what kind of type we have, this relies on
+   // IEEE conforming reals that will treat
+   // Nan's as unordered.  Some compilers
+   // don't do this once optimisations are
+   // turned on, hence the check for nan's above.
+   if(at <= FLT_MAX)
+   {
+      if(at >= FLT_MIN)
+         return FP_NORMAL;
+      return (at != 0) ? FP_SUBNORMAL : FP_ZERO;
+   }
+   else if(at > FLT_MAX)
+      return FP_INFINITE;
+   return FP_NAN;
+}
+
+template<> inline BOOST_MATH_GPU_ENABLED int (fpclassify)(double t)
+{
+   if((boost::math::isnan)(t))
+      return FP_NAN;
+   // std::fabs broken on a few systems especially for long long!!!!
+   double at = (t < 0.0) ? -t : t;
+
+   // Use a process of exclusion to figure out
+   // what kind of type we have, this relies on
+   // IEEE conforming reals that will treat
+   // Nan's as unordered.  Some compilers
+   // don't do this once optimisations are
+   // turned on, hence the check for nan's above.
+   if(at <= DBL_MAX)
+   {
+      if(at >= DBL_MIN)
+         return FP_NORMAL;
+      return (at != 0) ? FP_SUBNORMAL : FP_ZERO;
+   }
+   else if(at > DBL_MAX)
+      return FP_INFINITE;
+   return FP_NAN;
+}
+
+#else
+
 #if defined(_MSC_VER) || defined(BOOST_BORLANDC)
 #include <cfloat>
 #endif
@@ -632,7 +712,86 @@ inline bool (isnan)(__float128 x)
 }
 #endif
 
+#endif
+
 } // namespace math
 } // namespace boost
+
+#else // Special handling generally using the CUDA library
+
+#include <boost/math/tools/type_traits.hpp>
+
+namespace boost {
+namespace math {
+
+template <typename T, boost::math::enable_if_t<boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isnan(T x)
+{
+   return false;
+}
+
+template <typename T, boost::math::enable_if_t<!boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isnan(T x)
+{
+   return ::isnan(x);
+}
+
+template <typename T, boost::math::enable_if_t<boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isinf(T x)
+{
+   return false;
+}
+
+template <typename T, boost::math::enable_if_t<!boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isinf(T x)
+{
+   return ::isinf(x);
+}
+
+template <typename T, boost::math::enable_if_t<boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isfinite(T x)
+{
+   return true;
+}
+
+template <typename T, boost::math::enable_if_t<!boost::math::is_integral_v<T>, bool> = true>
+inline BOOST_MATH_GPU_ENABLED bool isfinite(T x)
+{
+   return ::isfinite(x);
+}
+
+template <typename T>
+inline BOOST_MATH_GPU_ENABLED bool isnormal(T x)
+{
+   return x != static_cast<T>(0) && x != static_cast<T>(-0) && 
+            !boost::math::isnan(x) && 
+            !boost::math::isinf(x);
+}
+
+// We skip the check for FP_SUBNORMAL since they are not supported on these platforms
+template <typename T>
+inline BOOST_MATH_GPU_ENABLED int fpclassify(T x)
+{
+   if (boost::math::isnan(x))
+   {
+      return BOOST_MATH_FP_NAN;
+   }
+   else if (boost::math::isinf(x))
+   {
+      return BOOST_MATH_FP_INFINITE;
+   }
+   else if (x == static_cast<T>(0) || x == static_cast<T>(-0))
+   {
+      return BOOST_MATH_FP_ZERO;
+   }
+
+   return BOOST_MATH_FP_NORMAL;
+}
+
+} // Namespace math
+} // Namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTC
+
 #endif // BOOST_MATH_FPCLASSIFY_HPP
 
diff --git a/include/boost/math/special_functions/gamma.hpp b/include/boost/math/special_functions/gamma.hpp
index a58ea3e693..4a15782c01 100644
--- a/include/boost/math/special_functions/gamma.hpp
+++ b/include/boost/math/special_functions/gamma.hpp
@@ -2,7 +2,7 @@
 //  Copyright Paul A. Bristow 2007, 2013-14.
 //  Copyright Nikhar Agrawal 2013-14
 //  Copyright Christopher Kormanyos 2013-14, 2020, 2024
-
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -14,12 +14,15 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/series.hpp>
 #include <boost/math/tools/fraction.hpp>
 #include <boost/math/tools/precision.hpp>
 #include <boost/math/tools/promotion.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/tools/assert.hpp>
-#include <boost/math/tools/config.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
@@ -32,12 +35,12 @@
 #include <boost/math/special_functions/detail/igamma_large.hpp>
 #include <boost/math/special_functions/detail/unchecked_factorial.hpp>
 #include <boost/math/special_functions/detail/lgamma_small.hpp>
+
+// Only needed for types larger than double
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
 #include <boost/math/special_functions/bernoulli.hpp>
 #include <boost/math/special_functions/polygamma.hpp>
-
-#include <cmath>
-#include <algorithm>
-#include <type_traits>
+#endif
 
 #ifdef _MSC_VER
 # pragma warning(push)
@@ -56,13 +59,13 @@ namespace boost{ namespace math{
 namespace detail{
 
 template <class T>
-inline bool is_odd(T v, const std::true_type&)
+BOOST_MATH_GPU_ENABLED inline bool is_odd(T v, const boost::math::true_type&)
 {
    int i = static_cast<int>(v);
    return i&1;
 }
 template <class T>
-inline bool is_odd(T v, const std::false_type&)
+BOOST_MATH_GPU_ENABLED inline bool is_odd(T v, const boost::math::false_type&)
 {
    // Oh dear can't cast T to int!
    BOOST_MATH_STD_USING
@@ -70,13 +73,13 @@ inline bool is_odd(T v, const std::false_type&)
    return static_cast<bool>(modulus != 0);
 }
 template <class T>
-inline bool is_odd(T v)
+BOOST_MATH_GPU_ENABLED inline bool is_odd(T v)
 {
-   return is_odd(v, ::std::is_convertible<T, int>());
+   return is_odd(v, ::boost::math::is_convertible<T, int>());
 }
 
 template <class T>
-T sinpx(T z)
+BOOST_MATH_GPU_ENABLED T sinpx(T z)
 {
    // Ad hoc function calculates x * sin(pi * x),
    // taking extra care near when x is near a whole number.
@@ -108,7 +111,7 @@ T sinpx(T z)
 // tgamma(z), with Lanczos support:
 //
 template <class T, class Policy, class Lanczos>
-T gamma_imp(T z, const Policy& pol, const Lanczos& l)
+BOOST_MATH_GPU_ENABLED T gamma_imp_final(T z, const Policy& pol, const Lanczos&)
 {
    BOOST_MATH_STD_USING
 
@@ -122,25 +125,13 @@ T gamma_imp(T z, const Policy& pol, const Lanczos& l)
       b = true;
    }
 #endif
-   static const char* function = "boost::math::tgamma<%1%>(%1%)";
+   constexpr auto function = "boost::math::tgamma<%1%>(%1%)";
 
    if(z <= 0)
    {
       if(floor(z) == z)
-         return policies::raise_pole_error<T>(function, "Evaluation of tgamma at a negative integer %1%.", z, pol);
-      if(z <= -20)
       {
-         result = gamma_imp(T(-z), pol, l) * sinpx(z);
-         BOOST_MATH_INSTRUMENT_VARIABLE(result);
-         if((fabs(result) < 1) && (tools::max_value<T>() * fabs(result) < boost::math::constants::pi<T>()))
-            return -boost::math::sign(result) * policies::raise_overflow_error<T>(function, "Result of tgamma is too large to represent.", pol);
-         result = -boost::math::constants::pi<T>() / result;
-         if(result == 0)
-            return policies::raise_underflow_error<T>(function, "Result of tgamma is too small to represent.", pol);
-         if((boost::math::fpclassify)(result) == (int)FP_SUBNORMAL)
-            return policies::raise_denorm_error<T>(function, "Result of tgamma is denormalized.", result, pol);
-         BOOST_MATH_INSTRUMENT_VARIABLE(result);
-         return result;
+         return policies::raise_pole_error<T>(function, "Evaluation of tgamma at a negative integer %1%.", z, pol);
       }
 
       // shift z to > 1:
@@ -195,11 +186,52 @@ T gamma_imp(T z, const Policy& pol, const Lanczos& l)
    }
    return result;
 }
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  pragma nv_diag_suppress 2190
+#endif
+
+// SYCL compilers can not support recursion so we extract it into a dispatch function
+template <class T, class Policy, class Lanczos>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T gamma_imp(T z, const Policy& pol, const Lanczos& l)
+{
+   BOOST_MATH_STD_USING
+
+   T result = 1;
+   constexpr auto function = "boost::math::tgamma<%1%>(%1%)";
+
+   if(z <= 0)
+   {
+      if(floor(z) == z)
+         return policies::raise_pole_error<T>(function, "Evaluation of tgamma at a negative integer %1%.", z, pol);
+      if(z <= -20)
+      {
+         result = gamma_imp_final(T(-z), pol, l) * sinpx(z);
+         BOOST_MATH_INSTRUMENT_VARIABLE(result);
+         if((fabs(result) < 1) && (tools::max_value<T>() * fabs(result) < boost::math::constants::pi<T>()))
+            return -boost::math::sign(result) * policies::raise_overflow_error<T>(function, "Result of tgamma is too large to represent.", pol);
+         result = -boost::math::constants::pi<T>() / result;
+         if(result == 0)
+            return policies::raise_underflow_error<T>(function, "Result of tgamma is too small to represent.", pol);
+         if((boost::math::fpclassify)(result) == BOOST_MATH_FP_SUBNORMAL)
+            return policies::raise_denorm_error<T>(function, "Result of tgamma is denormalized.", result, pol);
+         BOOST_MATH_INSTRUMENT_VARIABLE(result);
+         return result;
+      }
+   }
+
+   return gamma_imp_final(T(z), pol, l);
+}
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  pragma nv_diag_default 2190
+#endif
+
 //
 // lgamma(z) with Lanczos support:
 //
 template <class T, class Policy, class Lanczos>
-T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr)
+BOOST_MATH_GPU_ENABLED T lgamma_imp_final(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr)
 {
 #ifdef BOOST_MATH_INSTRUMENT
    static bool b = false;
@@ -212,29 +244,12 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr)
 
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::lgamma<%1%>(%1%)";
+   constexpr auto function = "boost::math::lgamma<%1%>(%1%)";
 
    T result = 0;
    int sresult = 1;
-   if(z <= -tools::root_epsilon<T>())
-   {
-      // reflection formula:
-      if(floor(z) == z)
-         return policies::raise_pole_error<T>(function, "Evaluation of lgamma at a negative integer %1%.", z, pol);
-
-      T t = sinpx(z);
-      z = -z;
-      if(t < 0)
-      {
-         t = -t;
-      }
-      else
-      {
-         sresult = -sresult;
-      }
-      result = log(boost::math::constants::pi<T>()) - lgamma_imp(z, pol, l) - log(t);
-   }
-   else if (z < tools::root_epsilon<T>())
+   
+   if (z < tools::root_epsilon<T>())
    {
       if (0 == z)
          return policies::raise_pole_error<T>(function, "Evaluation of lgamma at %1%.", z, pol);
@@ -248,7 +263,7 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr)
    else if(z < 15)
    {
       typedef typename policies::precision<T, Policy>::type precision_type;
-      typedef std::integral_constant<int,
+      typedef boost::math::integral_constant<int,
          precision_type::value <= 0 ? 0 :
          precision_type::value <= 64 ? 64 :
          precision_type::value <= 113 ? 113 : 0
@@ -256,7 +271,7 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr)
 
       result = lgamma_small_imp<T>(z, T(z - 1), T(z - 2), tag_type(), pol, l);
    }
-   else if((z >= 3) && (z < 100) && (std::numeric_limits<T>::max_exponent >= 1024))
+   else if((z >= 3) && (z < 100) && (boost::math::numeric_limits<T>::max_exponent >= 1024))
    {
       // taking the log of tgamma reduces the error, no danger of overflow here:
       result = log(gamma_imp(z, pol, l));
@@ -279,6 +294,55 @@ T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr)
    return result;
 }
 
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  pragma nv_diag_suppress 2190
+#endif
+
+template <class T, class Policy, class Lanczos>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_FORCEINLINE T lgamma_imp(T z, const Policy& pol, const Lanczos& l, int* sign = nullptr)
+{
+   BOOST_MATH_STD_USING
+
+   if(z <= -tools::root_epsilon<T>())
+   {
+      constexpr auto function = "boost::math::lgamma<%1%>(%1%)";
+
+      T result = 0;
+      int sresult = 1;
+
+      // reflection formula:
+      if(floor(z) == z)
+         return policies::raise_pole_error<T>(function, "Evaluation of lgamma at a negative integer %1%.", z, pol);
+
+      T t = sinpx(z);
+      z = -z;
+      if(t < 0)
+      {
+         t = -t;
+      }
+      else
+      {
+         sresult = -sresult;
+      }
+      result = log(boost::math::constants::pi<T>()) - lgamma_imp_final(T(z), pol, l) - log(t);
+
+      if(sign)
+      {
+         *sign = sresult;
+      }
+
+      return result;
+   }
+   else
+   {
+      return lgamma_imp_final(T(z), pol, l, sign);
+   }
+}
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  pragma nv_diag_default 2190
+#endif
+
 //
 // Incomplete gamma functions follow:
 //
@@ -289,14 +353,14 @@ struct upper_incomplete_gamma_fract
    T z, a;
    int k;
 public:
-   typedef std::pair<T,T> result_type;
+   typedef boost::math::pair<T,T> result_type;
 
-   upper_incomplete_gamma_fract(T a1, T z1)
+   BOOST_MATH_GPU_ENABLED upper_incomplete_gamma_fract(T a1, T z1)
       : z(z1-a1+1), a(a1), k(0)
    {
    }
 
-   result_type operator()()
+   BOOST_MATH_GPU_ENABLED result_type operator()()
    {
       ++k;
       z += 2;
@@ -305,7 +369,7 @@ struct upper_incomplete_gamma_fract
 };
 
 template <class T>
-inline T upper_gamma_fraction(T a, T z, T eps)
+BOOST_MATH_GPU_ENABLED inline T upper_gamma_fraction(T a, T z, T eps)
 {
    // Multiply result by z^a * e^-z to get the full
    // upper incomplete integral.  Divide by tgamma(z)
@@ -321,9 +385,9 @@ struct lower_incomplete_gamma_series
    T a, z, result;
 public:
    typedef T result_type;
-   lower_incomplete_gamma_series(T a1, T z1) : a(a1), z(z1), result(1){}
+   BOOST_MATH_GPU_ENABLED lower_incomplete_gamma_series(T a1, T z1) : a(a1), z(z1), result(1){}
 
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T r = result;
       a += 1;
@@ -333,32 +397,34 @@ struct lower_incomplete_gamma_series
 };
 
 template <class T, class Policy>
-inline T lower_gamma_series(T a, T z, const Policy& pol, T init_value = 0)
+BOOST_MATH_GPU_ENABLED inline T lower_gamma_series(T a, T z, const Policy& pol, T init_value = 0)
 {
    // Multiply result by ((z^a) * (e^-z) / a) to get the full
    // lower incomplete integral. Then divide by tgamma(a)
    // to get the normalised value.
    lower_incomplete_gamma_series<T> s(a, z);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
    T factor = policies::get_epsilon<T, Policy>();
    T result = boost::math::tools::sum_series(s, factor, max_iter, init_value);
    policies::check_series_iterations<T>("boost::math::detail::lower_gamma_series<%1%>(%1%)", max_iter, pol);
    return result;
 }
 
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 //
 // Fully generic tgamma and lgamma use Stirling's approximation
 // with Bernoulli numbers.
 //
 template<class T>
-std::size_t highest_bernoulli_index()
+boost::math::size_t highest_bernoulli_index()
 {
-   const float digits10_of_type = (std::numeric_limits<T>::is_specialized
-                                      ? static_cast<float>(std::numeric_limits<T>::digits10)
+   const float digits10_of_type = (boost::math::numeric_limits<T>::is_specialized
+                                      ? static_cast<float>(boost::math::numeric_limits<T>::digits10)
                                       : static_cast<float>(boost::math::tools::digits<T>() * 0.301F));
 
    // Find the high index n for Bn to produce the desired precision in Stirling's calculation.
-   return static_cast<std::size_t>(18.0F + (0.6F * digits10_of_type));
+   return static_cast<boost::math::size_t>(18.0F + (0.6F * digits10_of_type));
 }
 
 template<class T>
@@ -366,8 +432,8 @@ int minimum_argument_for_bernoulli_recursion()
 {
    BOOST_MATH_STD_USING
 
-   const float digits10_of_type = (std::numeric_limits<T>::is_specialized
-                                    ? (float) std::numeric_limits<T>::digits10
+   const float digits10_of_type = (boost::math::numeric_limits<T>::is_specialized
+                                    ? (float) boost::math::numeric_limits<T>::digits10
                                     : (float) (boost::math::tools::digits<T>() * 0.301F));
 
    int min_arg = (int) (digits10_of_type * 1.7F);
@@ -389,7 +455,7 @@ int minimum_argument_for_bernoulli_recursion()
       const float d2_minus_one = ((digits10_of_type / 0.301F) - 1.0F);
       const float limit        = ceil(exp((d2_minus_one * log(2.0F)) / 20.0F));
 
-      min_arg = (int) ((std::min)(digits10_of_type * 1.7F, limit));
+      min_arg = (int) (BOOST_MATH_GPU_SAFE_MIN(digits10_of_type * 1.7F, limit));
    }
 
    return min_arg;
@@ -408,7 +474,7 @@ T scaled_tgamma_no_lanczos(const T& z, const Policy& pol, bool islog = false)
 
    // Perform the Bernoulli series expansion of Stirling's approximation.
 
-   const std::size_t number_of_bernoullis_b2n = policies::get_max_series_iterations<Policy>();
+   const boost::math::size_t number_of_bernoullis_b2n = policies::get_max_series_iterations<Policy>();
 
    T one_over_x_pow_two_n_minus_one = 1 / z;
    const T one_over_x2 = one_over_x_pow_two_n_minus_one * one_over_x_pow_two_n_minus_one;
@@ -417,11 +483,11 @@ T scaled_tgamma_no_lanczos(const T& z, const Policy& pol, bool islog = false)
    const T half_ln_two_pi_over_z = sqrt(boost::math::constants::two_pi<T>() / z);
    T last_term = 2 * sum;
 
-   for (std::size_t n = 2U;; ++n)
+   for (boost::math::size_t n = 2U;; ++n)
    {
       one_over_x_pow_two_n_minus_one *= one_over_x2;
 
-      const std::size_t n2 = static_cast<std::size_t>(n * 2U);
+      const boost::math::size_t n2 = static_cast<boost::math::size_t>(n * 2U);
 
       const T term = (boost::math::bernoulli_b2n<T>(static_cast<int>(n)) * one_over_x_pow_two_n_minus_one) / (n2 * (n2 - 1U));
 
@@ -460,7 +526,7 @@ T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&)
 {
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::tgamma<%1%>(%1%)";
+   constexpr auto function = "boost::math::tgamma<%1%>(%1%)";
 
    // Check if the argument of tgamma is identically zero.
    const bool is_at_zero = (z == 0);
@@ -569,7 +635,7 @@ T gamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&)
       if(gamma_value == 0)
          return policies::raise_underflow_error<T>(function, "Result of tgamma is too small to represent.", pol);
 
-      if((boost::math::fpclassify)(gamma_value) == static_cast<int>(FP_SUBNORMAL))
+      if((boost::math::fpclassify)(gamma_value) == static_cast<int>(BOOST_MATH_FP_SUBNORMAL))
          return policies::raise_denorm_error<T>(function, "Result of tgamma is denormalized.", gamma_value, pol);
    }
 
@@ -610,7 +676,7 @@ T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sig
 {
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::lgamma<%1%>(%1%)";
+   constexpr auto function = "boost::math::lgamma<%1%>(%1%)";
 
    // Check if the argument of lgamma is identically zero.
    const bool is_at_zero = (z == 0);
@@ -715,18 +781,33 @@ T lgamma_imp(T z, const Policy& pol, const lanczos::undefined_lanczos&, int* sig
    return log_gamma_value;
 }
 
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
+// In order for tgammap1m1_imp to compile we need a forward decl of boost::math::tgamma
+// The rub is that we can't just use math_fwd so we provide one here only in that circumstance
+#ifdef BOOST_MATH_HAS_NVRTC
+template <class RT>
+BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> tgamma(RT z);
+
+template <class RT1, class RT2>
+BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z);
+
+template <class RT1, class RT2, class Policy>
+BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z, const Policy& pol);
+#endif
+
 //
 // This helper calculates tgamma(dz+1)-1 without cancellation errors,
 // used by the upper incomplete gamma with z < 1:
 //
 template <class T, class Policy, class Lanczos>
-T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l)
+BOOST_MATH_GPU_ENABLED T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l)
 {
    BOOST_MATH_STD_USING
 
    typedef typename policies::precision<T,Policy>::type precision_type;
 
-   typedef std::integral_constant<int,
+   typedef boost::math::integral_constant<int,
       precision_type::value <= 0 ? 0 :
       precision_type::value <= 64 ? 64 :
       precision_type::value <= 113 ? 113 : 0
@@ -738,7 +819,11 @@ T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l)
       if(dz < T(-0.5))
       {
          // Best method is simply to subtract 1 from tgamma:
+         #ifdef BOOST_MATH_HAS_NVRTC
+         result = ::tgamma(1+dz);
+         #else
          result = boost::math::tgamma(1+dz, pol) - 1;
+         #endif
          BOOST_MATH_INSTRUMENT_CODE(result);
       }
       else
@@ -760,7 +845,11 @@ T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l)
       else
       {
          // Best method is simply to subtract 1 from tgamma:
+         #ifdef BOOST_MATH_HAS_NVRTC
+         result = ::tgamma(1+dz);
+         #else
          result = boost::math::tgamma(1+dz, pol) - 1;
+         #endif
          BOOST_MATH_INSTRUMENT_CODE(result);
       }
    }
@@ -768,6 +857,8 @@ T tgammap1m1_imp(T dz, Policy const& pol, const Lanczos& l)
    return result;
 }
 
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 template <class T, class Policy>
 inline T tgammap1m1_imp(T z, Policy const& pol,
                  const ::boost::math::lanczos::undefined_lanczos&)
@@ -781,6 +872,8 @@ inline T tgammap1m1_imp(T z, Policy const& pol,
    return boost::math::expm1(boost::math::lgamma(1 + z, pol));
 }
 
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
 //
 // Series representation for upper fraction when z is small:
 //
@@ -789,9 +882,9 @@ struct small_gamma2_series
 {
    typedef T result_type;
 
-   small_gamma2_series(T a_, T x_) : result(-x_), x(-x_), apn(a_+1), n(1){}
+   BOOST_MATH_GPU_ENABLED small_gamma2_series(T a_, T x_) : result(-x_), x(-x_), apn(a_+1), n(1){}
 
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T r = result / (apn);
       result *= x;
@@ -809,7 +902,7 @@ struct small_gamma2_series
 // incomplete gammas:
 //
 template <class T, class Policy>
-T full_igamma_prefix(T a, T z, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T full_igamma_prefix(T a, T z, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -854,7 +947,7 @@ T full_igamma_prefix(T a, T z, const Policy& pol)
    // This error handling isn't very good: it happens after the fact
    // rather than before it...
    //
-   if((boost::math::fpclassify)(prefix) == (int)FP_INFINITE)
+   if((boost::math::fpclassify)(prefix) == (int)BOOST_MATH_FP_INFINITE)
       return policies::raise_overflow_error<T>("boost::math::detail::full_igamma_prefix<%1%>(%1%, %1%)", "Result of incomplete gamma function is too large to represent.", pol);
 
    return prefix;
@@ -864,7 +957,7 @@ T full_igamma_prefix(T a, T z, const Policy& pol)
 // most if the error occurs in this function:
 //
 template <class T, class Policy, class Lanczos>
-T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l)
+BOOST_MATH_GPU_ENABLED T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l)
 {
    BOOST_MATH_STD_USING
    if (z >= tools::max_value<T>())
@@ -911,16 +1004,16 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l)
       //
       T alz = a * log(z / agh);
       T amz = a - z;
-      if(((std::min)(alz, amz) <= tools::log_min_value<T>()) || ((std::max)(alz, amz) >= tools::log_max_value<T>()))
+      if((BOOST_MATH_GPU_SAFE_MIN(alz, amz) <= tools::log_min_value<T>()) || (BOOST_MATH_GPU_SAFE_MAX(alz, amz) >= tools::log_max_value<T>()))
       {
          T amza = amz / a;
-         if(((std::min)(alz, amz)/2 > tools::log_min_value<T>()) && ((std::max)(alz, amz)/2 < tools::log_max_value<T>()))
+         if((BOOST_MATH_GPU_SAFE_MIN(alz, amz)/2 > tools::log_min_value<T>()) && (BOOST_MATH_GPU_SAFE_MAX(alz, amz)/2 < tools::log_max_value<T>()))
          {
             // compute square root of the result and then square it:
             T sq = pow(z / agh, a / 2) * exp(amz / 2);
             prefix = sq * sq;
          }
-         else if(((std::min)(alz, amz)/4 > tools::log_min_value<T>()) && ((std::max)(alz, amz)/4 < tools::log_max_value<T>()) && (z > a))
+         else if((BOOST_MATH_GPU_SAFE_MIN(alz, amz)/4 > tools::log_min_value<T>()) && (BOOST_MATH_GPU_SAFE_MAX(alz, amz)/4 < tools::log_max_value<T>()) && (z > a))
          {
             // compute the 4th root of the result then square it twice:
             T sq = pow(z / agh, a / 4) * exp(amz / 4);
@@ -944,6 +1037,9 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const Lanczos& l)
    prefix *= sqrt(agh / boost::math::constants::e<T>()) / Lanczos::lanczos_sum_expG_scaled(a);
    return prefix;
 }
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 //
 // And again, without Lanczos support:
 //
@@ -1013,18 +1109,28 @@ T regularised_gamma_prefix(T a, T z, const Policy& pol, const lanczos::undefined
       }
    }
 }
+
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
 //
 // Upper gamma fraction for very small a:
 //
 template <class T, class Policy>
-inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool invert = false, T* pderivative = 0)
+BOOST_MATH_GPU_ENABLED inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool invert = false, T* pderivative = 0)
 {
    BOOST_MATH_STD_USING  // ADL of std functions.
    //
    // Compute the full upper fraction (Q) when a is very small:
    //
 
+   #ifdef BOOST_MATH_HAS_NVRTC
+   typedef typename tools::promote_args<T>::type result_type;
+   typedef typename policies::evaluation<result_type, Policy>::type value_type;
+   typedef typename lanczos::lanczos<value_type, Policy>::type evaluation_type;
+   T result {detail::tgammap1m1_imp(static_cast<value_type>(a), pol, evaluation_type())};
+   #else
    T result { boost::math::tgamma1pm1(a, pol) };
+   #endif
 
    if(pgam)
       *pgam = (result + 1) / a;
@@ -1032,7 +1138,7 @@ inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool
    result -= p;
    result /= a;
    detail::small_gamma2_series<T> s(a, x);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>() - 10;
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>() - 10;
    p += 1;
    if(pderivative)
       *pderivative = p / (*pgam * exp(x));
@@ -1047,7 +1153,7 @@ inline T tgamma_small_upper_part(T a, T x, const Policy& pol, T* pgam = 0, bool
 // Upper gamma fraction for integer a:
 //
 template <class T, class Policy>
-inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0)
+BOOST_MATH_GPU_ENABLED inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0)
 {
    //
    // Calculates normalised Q when a is an integer:
@@ -1075,13 +1181,27 @@ inline T finite_gamma_q(T a, T x, Policy const& pol, T* pderivative = 0)
 // Upper gamma fraction for half integer a:
 //
 template <class T, class Policy>
-T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T finite_half_gamma_q(T a, T x, T* p_derivative, const Policy& pol)
 {
    //
    // Calculates normalised Q when a is a half-integer:
    //
    BOOST_MATH_STD_USING
+
+   #ifdef BOOST_MATH_HAS_NVRTC
+   T e;
+   if (boost::math::is_same_v<T, float>)
+   {
+      e = ::erfcf(::sqrtf(x));
+   }
+   else
+   {
+      e = ::erfc(::sqrt(x));
+   }
+   #else
    T e = boost::math::erfc(sqrt(x), pol);
+   #endif
+
    if((e != 0) && (a > 1))
    {
       T term = exp(-x) / sqrt(constants::pi<T>() * x);
@@ -1115,9 +1235,9 @@ template <class T>
 struct incomplete_tgamma_large_x_series
 {
    typedef T result_type;
-   incomplete_tgamma_large_x_series(const T& a, const T& x)
+   BOOST_MATH_GPU_ENABLED incomplete_tgamma_large_x_series(const T& a, const T& x)
       : a_poch(a - 1), z(x), term(1) {}
-   T operator()()
+   BOOST_MATH_GPU_ENABLED T operator()()
    {
       T result = term;
       term *= a_poch / z;
@@ -1128,11 +1248,11 @@ struct incomplete_tgamma_large_x_series
 };
 
 template <class T, class Policy>
-T incomplete_tgamma_large_x(const T& a, const T& x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T incomplete_tgamma_large_x(const T& a, const T& x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    incomplete_tgamma_large_x_series<T> s(a, x);
-   std::uintmax_t max_iter = boost::math::policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = boost::math::policies::get_max_series_iterations<Policy>();
    T result = boost::math::tools::sum_series(s, boost::math::policies::get_epsilon<T, Policy>(), max_iter);
    boost::math::policies::check_series_iterations<T>("boost::math::tgamma<%1%>(%1%,%1%)", max_iter, pol);
    return result;
@@ -1143,10 +1263,10 @@ T incomplete_tgamma_large_x(const T& a, const T& x, const Policy& pol)
 // Main incomplete gamma entry point, handles all four incomplete gamma's:
 //
 template <class T, class Policy>
-T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
+BOOST_MATH_GPU_ENABLED T gamma_incomplete_imp_final(T a, T x, bool normalised, bool invert,
                        const Policy& pol, T* p_derivative)
 {
-   static const char* function = "boost::math::gamma_p<%1%>(%1%, %1%)";
+   constexpr auto function = "boost::math::gamma_p<%1%>(%1%, %1%)";
    if(a <= 0)
       return policies::raise_domain_error<T>(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol);
    if(x < 0)
@@ -1158,70 +1278,6 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
 
    T result = 0; // Just to avoid warning C4701: potentially uninitialized local variable 'result' used
 
-   if(a >= max_factorial<T>::value && !normalised)
-   {
-      //
-      // When we're computing the non-normalized incomplete gamma
-      // and a is large the result is rather hard to compute unless
-      // we use logs.  There are really two options - if x is a long
-      // way from a in value then we can reliably use methods 2 and 4
-      // below in logarithmic form and go straight to the result.
-      // Otherwise we let the regularized gamma take the strain
-      // (the result is unlikely to underflow in the central region anyway)
-      // and combine with lgamma in the hopes that we get a finite result.
-      //
-      if(invert && (a * 4 < x))
-      {
-         // This is method 4 below, done in logs:
-         result = a * log(x) - x;
-         if(p_derivative)
-            *p_derivative = exp(result);
-         result += log(upper_gamma_fraction(a, x, policies::get_epsilon<T, Policy>()));
-      }
-      else if(!invert && (a > 4 * x))
-      {
-         // This is method 2 below, done in logs:
-         result = a * log(x) - x;
-         if(p_derivative)
-            *p_derivative = exp(result);
-         T init_value = 0;
-         result += log(detail::lower_gamma_series(a, x, pol, init_value) / a);
-      }
-      else
-      {
-         result = gamma_incomplete_imp(a, x, true, invert, pol, p_derivative);
-         if(result == 0)
-         {
-            if(invert)
-            {
-               // Try http://functions.wolfram.com/06.06.06.0039.01
-               result = 1 + 1 / (12 * a) + 1 / (288 * a * a);
-               result = log(result) - a + (a - 0.5f) * log(a) + log(boost::math::constants::root_two_pi<T>());
-               if(p_derivative)
-                  *p_derivative = exp(a * log(x) - x);
-            }
-            else
-            {
-               // This is method 2 below, done in logs, we're really outside the
-               // range of this method, but since the result is almost certainly
-               // infinite, we should probably be OK:
-               result = a * log(x) - x;
-               if(p_derivative)
-                  *p_derivative = exp(result);
-               T init_value = 0;
-               result += log(detail::lower_gamma_series(a, x, pol, init_value) / a);
-            }
-         }
-         else
-         {
-            result = log(result) + boost::math::lgamma(a, pol);
-         }
-      }
-      if(result > tools::log_max_value<T>())
-         return policies::raise_overflow_error<T>(function, nullptr, pol);
-      return exp(result);
-   }
-
    BOOST_MATH_ASSERT((p_derivative == nullptr) || normalised);
 
    bool is_int, is_half_int;
@@ -1297,7 +1353,7 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
       // series and continued fractions are slow to converge:
       //
       bool use_temme = false;
-      if(normalised && std::numeric_limits<T>::is_specialized && (a > 20))
+      if(normalised && boost::math::numeric_limits<T>::is_specialized && (a > 20))
       {
          T sigma = fabs((x-a)/a);
          if((a > 200) && (policies::digits<T, Policy>() <= 113))
@@ -1354,14 +1410,40 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
       {
          result = finite_gamma_q(a, x, pol, p_derivative);
          if(!normalised)
+         {
+            #ifdef BOOST_MATH_HAS_NVRTC
+            if (boost::math::is_same_v<T, float>)
+            {
+               result *= ::tgammaf(a);
+            }
+            else
+            {
+               result *= ::tgamma(a);
+            }
+            #else
             result *= boost::math::tgamma(a, pol);
+            #endif
+         }
          break;
       }
    case 1:
       {
          result = finite_half_gamma_q(a, x, p_derivative, pol);
          if(!normalised)
+         {
+            #ifdef BOOST_MATH_HAS_NVRTC
+            if (boost::math::is_same_v<T, float>)
+            {
+               result *= ::tgammaf(a);
+            }
+            else
+            {
+               result *= ::tgamma(a);
+            }
+            #else
             result *= boost::math::tgamma(a, pol);
+            #endif
+         }
          if(p_derivative && (*p_derivative == 0))
             *p_derivative = regularised_gamma_prefix(a, x, pol, lanczos_type());
          break;
@@ -1390,7 +1472,19 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
             bool optimised_invert = false;
             if(invert)
             {
+               #ifdef BOOST_MATH_HAS_NVRTC
+               if (boost::math::is_same_v<T, float>)
+               {
+                  init_value = (normalised ? 1 : ::tgammaf(a));
+               }
+               else
+               {
+                  init_value = (normalised ? 1 : ::tgamma(a));
+               }
+               #else
                init_value = (normalised ? 1 : boost::math::tgamma(a, pol));
+               #endif
+
                if(normalised || (result >= 1) || (tools::max_value<T>() * result > init_value))
                {
                   init_value /= result;
@@ -1447,14 +1541,14 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
          //
          typedef typename policies::precision<T, Policy>::type precision_type;
 
-         typedef std::integral_constant<int,
+         typedef boost::math::integral_constant<int,
             precision_type::value <= 0 ? 0 :
             precision_type::value <= 53 ? 53 :
             precision_type::value <= 64 ? 64 :
             precision_type::value <= 113 ? 113 : 0
          > tag_type;
 
-         result = igamma_temme_large(a, x, pol, static_cast<tag_type const*>(nullptr));
+         result = igamma_temme_large(a, x, pol, tag_type());
          if(x >= a)
             invert = !invert;
          if(p_derivative)
@@ -1473,7 +1567,18 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
             try
             {
 #endif
+               #ifdef BOOST_MATH_HAS_NVRTC
+               if (boost::math::is_same_v<T, float>)
+               {
+                  result = ::powf(x, a) / ::tgammaf(a + 1);
+               }
+               else
+               {
+                  result = ::pow(x, a) / ::tgamma(a + 1);
+               }
+               #else
                result = pow(x, a) / boost::math::tgamma(a + 1, pol);
+               #endif
 #ifndef BOOST_MATH_NO_EXCEPTIONS
             }
             catch (const std::overflow_error&)
@@ -1505,7 +1610,19 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
       result = 1;
    if(invert)
    {
+      #ifdef BOOST_MATH_HAS_NVRTC
+      T gam;
+      if (boost::math::is_same_v<T, float>)
+      {
+         gam = normalised ? 1 : ::tgammaf(a);
+      }
+      else
+      {
+         gam = normalised ? 1 : ::tgamma(a);
+      }
+      #else
       T gam = normalised ? 1 : boost::math::tgamma(a, pol);
+      #endif
       result = gam - result;
    }
    if(p_derivative)
@@ -1525,36 +1642,109 @@ T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
    return result;
 }
 
-//
-// Ratios of two gamma functions:
-//
-template <class T, class Policy, class Lanczos>
-T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& l)
+// Need to implement this dispatch to avoid recursion for device compilers
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED T gamma_incomplete_imp(T a, T x, bool normalised, bool invert,
+                       const Policy& pol, T* p_derivative)
 {
+   constexpr auto function = "boost::math::gamma_p<%1%>(%1%, %1%)";
+   if(a <= 0)
+      return policies::raise_domain_error<T>(function, "Argument a to the incomplete gamma function must be greater than zero (got a=%1%).", a, pol);
+   if(x < 0)
+      return policies::raise_domain_error<T>(function, "Argument x to the incomplete gamma function must be >= 0 (got x=%1%).", x, pol);
+
    BOOST_MATH_STD_USING
-   if(z < tools::epsilon<T>())
+
+
+   T result = 0; // Just to avoid warning C4701: potentially uninitialized local variable 'result' used
+
+   if(a >= max_factorial<T>::value && !normalised)
    {
       //
-      // We get spurious numeric overflow unless we're very careful, this
-      // can occur either inside Lanczos::lanczos_sum(z) or in the
-      // final combination of terms, to avoid this, split the product up
-      // into 2 (or 3) parts:
-      //
-      // G(z) / G(L) = 1 / (z * G(L)) ; z < eps, L = z + delta = delta
-      //    z * G(L) = z * G(lim) * (G(L)/G(lim)) ; lim = largest factorial
+      // When we're computing the non-normalized incomplete gamma
+      // and a is large the result is rather hard to compute unless
+      // we use logs.  There are really two options - if x is a long
+      // way from a in value then we can reliably use methods 2 and 4
+      // below in logarithmic form and go straight to the result.
+      // Otherwise we let the regularized gamma take the strain
+      // (the result is unlikely to underflow in the central region anyway)
+      // and combine with lgamma in the hopes that we get a finite result.
       //
-      if(boost::math::max_factorial<T>::value < delta)
+      if(invert && (a * 4 < x))
       {
-         T ratio = tgamma_delta_ratio_imp_lanczos(delta, T(boost::math::max_factorial<T>::value - delta), pol, l);
-         ratio *= z;
-         ratio *= boost::math::unchecked_factorial<T>(boost::math::max_factorial<T>::value - 1);
-         return 1 / ratio;
+         // This is method 4 below, done in logs:
+         result = a * log(x) - x;
+         if(p_derivative)
+            *p_derivative = exp(result);
+         result += log(upper_gamma_fraction(a, x, policies::get_epsilon<T, Policy>()));
+      }
+      else if(!invert && (a > 4 * x))
+      {
+         // This is method 2 below, done in logs:
+         result = a * log(x) - x;
+         if(p_derivative)
+            *p_derivative = exp(result);
+         T init_value = 0;
+         result += log(detail::lower_gamma_series(a, x, pol, init_value) / a);
       }
       else
       {
-         return 1 / (z * boost::math::tgamma(z + delta, pol));
+         result = gamma_incomplete_imp_final(T(a), T(x), true, invert, pol, p_derivative);
+         if(result == 0)
+         {
+            if(invert)
+            {
+               // Try http://functions.wolfram.com/06.06.06.0039.01
+               result = 1 + 1 / (12 * a) + 1 / (288 * a * a);
+               result = log(result) - a + (a - 0.5f) * log(a) + log(boost::math::constants::root_two_pi<T>());
+               if(p_derivative)
+                  *p_derivative = exp(a * log(x) - x);
+            }
+            else
+            {
+               // This is method 2 below, done in logs, we're really outside the
+               // range of this method, but since the result is almost certainly
+               // infinite, we should probably be OK:
+               result = a * log(x) - x;
+               if(p_derivative)
+                  *p_derivative = exp(result);
+               T init_value = 0;
+               result += log(detail::lower_gamma_series(a, x, pol, init_value) / a);
+            }
+         }
+         else
+         {
+            #ifdef BOOST_MATH_HAS_NVRTC
+            if (boost::math::is_same_v<T, float>)
+            {
+               result = ::logf(result) + ::lgammaf(a);
+            }
+            else
+            {
+               result = ::log(result) + ::lgamma(a);
+            }
+            #else
+            result = log(result) + boost::math::lgamma(a, pol);
+            #endif
+         }
       }
+      if(result > tools::log_max_value<T>())
+         return policies::raise_overflow_error<T>(function, nullptr, pol);
+      return exp(result);
    }
+
+   // If no special handling is required then we proceeds as normal
+   return gamma_incomplete_imp_final(T(a), T(x), normalised, invert, pol, p_derivative);
+}
+
+//
+// Ratios of two gamma functions:
+//
+template <class T, class Policy, class Lanczos>
+BOOST_MATH_GPU_ENABLED T tgamma_delta_ratio_imp_lanczos_final(T z, T delta, const Policy& pol, const Lanczos&)
+{
+   BOOST_MATH_STD_USING
+
    T zgh = static_cast<T>(z + T(Lanczos::g()) - constants::half<T>());
    T result;
    if(z + delta == z)
@@ -1588,9 +1778,55 @@ T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos&
    result *= pow(T(constants::e<T>() / (zgh + delta)), delta);
    return result;
 }
+
+template <class T, class Policy, class Lanczos>
+BOOST_MATH_GPU_ENABLED T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const Lanczos& l)
+{
+   BOOST_MATH_STD_USING
+
+   if(z < tools::epsilon<T>())
+   {
+      //
+      // We get spurious numeric overflow unless we're very careful, this
+      // can occur either inside Lanczos::lanczos_sum(z) or in the
+      // final combination of terms, to avoid this, split the product up
+      // into 2 (or 3) parts:
+      //
+      // G(z) / G(L) = 1 / (z * G(L)) ; z < eps, L = z + delta = delta
+      //    z * G(L) = z * G(lim) * (G(L)/G(lim)) ; lim = largest factorial
+      //
+      if(boost::math::max_factorial<T>::value < delta)
+      {
+         T ratio = tgamma_delta_ratio_imp_lanczos_final(T(delta), T(boost::math::max_factorial<T>::value - delta), pol, l);
+         ratio *= z;
+         ratio *= boost::math::unchecked_factorial<T>(boost::math::max_factorial<T>::value - 1);
+         return 1 / ratio;
+      }
+      else
+      {
+         #ifdef BOOST_MATH_HAS_NVRTC
+         if (boost::math::is_same_v<T, float>)
+         {
+            return 1 / (z * ::tgammaf(z + delta));
+         }
+         else
+         {
+            return 1 / (z * ::tgamma(z + delta));
+         }
+         #else
+         return 1 / (z * boost::math::tgamma(z + delta, pol));
+         #endif
+      }
+   }
+
+   return tgamma_delta_ratio_imp_lanczos_final(T(z), T(delta), pol, l);
+}
+
 //
 // And again without Lanczos support this time:
 //
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
 template <class T, class Policy>
 T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const lanczos::undefined_lanczos& l)
 {
@@ -1647,15 +1883,28 @@ T tgamma_delta_ratio_imp_lanczos(T z, T delta, const Policy& pol, const lanczos:
    return ratio;
 }
 
+#endif
+
 template <class T, class Policy>
-T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
    if((z <= 0) || (z + delta <= 0))
    {
       // This isn't very sophisticated, or accurate, but it does work:
+      #ifdef BOOST_MATH_HAS_NVRTC
+      if (boost::math::is_same_v<T, float>)
+      {
+         return ::tgammaf(z) / ::tgammaf(z + delta);
+      }
+      else
+      {
+         return ::tgamma(z) / ::tgamma(z + delta);
+      }
+      #else
       return boost::math::tgamma(z, pol) / boost::math::tgamma(z + delta, pol);
+      #endif
    }
 
    if(floor(delta) == delta)
@@ -1706,7 +1955,7 @@ T tgamma_delta_ratio_imp(T z, T delta, const Policy& pol)
 }
 
 template <class T, class Policy>
-T tgamma_ratio_imp(T x, T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T tgamma_ratio_imp(T x, T y, const Policy& pol)
 {
    BOOST_MATH_STD_USING
 
@@ -1715,17 +1964,32 @@ T tgamma_ratio_imp(T x, T y, const Policy& pol)
    if((y <= 0) || (boost::math::isinf)(y))
       return policies::raise_domain_error<T>("boost::math::tgamma_ratio<%1%>(%1%, %1%)", "Gamma function ratios only implemented for positive arguments (got b=%1%).", y, pol);
 
+   // We don't need to worry about the denorm case on device
+   // And this has the added bonus of removing recursion
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    if(x <= tools::min_value<T>())
    {
       // Special case for denorms...Ugh.
       T shift = ldexp(T(1), tools::digits<T>());
       return shift * tgamma_ratio_imp(T(x * shift), y, pol);
    }
+   #endif
 
    if((x < max_factorial<T>::value) && (y < max_factorial<T>::value))
    {
       // Rather than subtracting values, lets just call the gamma functions directly:
+      #ifdef BOOST_MATH_HAS_NVRTC
+      if (boost::math::is_same_v<T, float>)
+      {
+         return ::tgammaf(x) / ::tgammaf(y);
+      }
+      else
+      {
+         return ::tgamma(x) / ::tgamma(y);
+      }
+      #else
       return boost::math::tgamma(x, pol) / boost::math::tgamma(y, pol);
+      #endif
    }
    T prefix = 1;
    if(x < 1)
@@ -1741,12 +2005,35 @@ T tgamma_ratio_imp(T x, T y, const Policy& pol)
             y -= 1;
             prefix /= y;
          }
+
+         #ifdef BOOST_MATH_HAS_NVRTC
+         if (boost::math::is_same_v<T, float>)
+         {
+            return prefix * ::tgammaf(x) / ::tgammaf(y);
+         }
+         else
+         {
+            return prefix * ::tgamma(x) / ::tgamma(y);
+         }
+         #else
          return prefix * boost::math::tgamma(x, pol) / boost::math::tgamma(y, pol);
+         #endif
       }
       //
       // result is almost certainly going to underflow to zero, try logs just in case:
       //
+      #ifdef BOOST_MATH_HAS_NVRTC
+      if (boost::math::is_same_v<T, float>)
+      {
+         return ::expf(::lgammaf(x) - ::lgammaf(y));
+      }
+      else
+      {
+         return ::exp(::lgamma(x) - ::lgamma(y));
+      }
+      #else
       return exp(boost::math::lgamma(x, pol) - boost::math::lgamma(y, pol));
+      #endif
    }
    if(y < 1)
    {
@@ -1761,21 +2048,48 @@ T tgamma_ratio_imp(T x, T y, const Policy& pol)
             x -= 1;
             prefix *= x;
          }
+
+         #ifdef BOOST_MATH_HAS_NVRTC
+         if (boost::math::is_same_v<T, float>)
+         {
+            return prefix * ::tgammaf(x) / ::tgammaf(y);
+         }
+         else
+         {
+            return prefix * ::tgamma(x) / ::tgamma(y);
+         }
+         #else
          return prefix * boost::math::tgamma(x, pol) / boost::math::tgamma(y, pol);
+         #endif
       }
       //
       // Result will almost certainly overflow, try logs just in case:
       //
+      #ifdef BOOST_MATH_HAS_NVRTC
+      if (boost::math::is_same_v<T, float>)
+      {
+         return ::expf(::lgammaf(x) - ::lgammaf(y));
+      }
+      else
+      {
+         return ::exp(::lgamma(x) - ::lgamma(y));
+      }
+      #else
       return exp(boost::math::lgamma(x, pol) - boost::math::lgamma(y, pol));
+      #endif
    }
    //
    // Regular case, x and y both large and similar in magnitude:
    //
+   #ifdef BOOST_MATH_HAS_NVRTC
+   return detail::tgamma_delta_ratio_imp(x, y - x, pol);
+   #else
    return boost::math::tgamma_delta_ratio(x, y - x, pol);
+   #endif
 }
 
 template <class T, class Policy>
-T gamma_p_derivative_imp(T a, T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T gamma_p_derivative_imp(T a, T x, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    //
@@ -1806,7 +2120,18 @@ T gamma_p_derivative_imp(T a, T x, const Policy& pol)
    if(f1 == 0)
    {
       // Underflow in calculation, use logs instead:
+      #ifdef BOOST_MATH_HAS_NVRTC
+      if (boost::math::is_same_v<T, float>)
+      {
+         f1 = a * ::logf(x) - x - ::lgammaf(a) - ::logf(x);
+      }
+      else
+      {
+         f1 = a * ::log(x) - x - ::lgamma(a) - ::log(x);
+      }
+      #else
       f1 = a * log(x) - x - lgamma(a, pol) - log(x);
+      #endif
       f1 = exp(f1);
    }
    else
@@ -1816,8 +2141,8 @@ T gamma_p_derivative_imp(T a, T x, const Policy& pol)
 }
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
-   tgamma(T z, const Policy& /* pol */, const std::true_type)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
+   tgamma(T z, const Policy& /* pol */, const boost::math::true_type)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename tools::promote_args<T>::type result_type;
@@ -1837,11 +2162,11 @@ struct igamma_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          typedef typename policies::precision<T, Policy>::type precision_type;
 
-         typedef std::integral_constant<int,
+         typedef boost::math::integral_constant<int,
             precision_type::value <= 0 ? 0 :
             precision_type::value <= 53 ? 53 :
             precision_type::value <= 64 ? 64 :
@@ -1851,24 +2176,26 @@ struct igamma_initializer
          do_init(tag_type());
       }
       template <int N>
-      static void do_init(const std::integral_constant<int, N>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, N>&)
       {
          // If std::numeric_limits<T>::digits is zero, we must not call
          // our initialization code here as the precision presumably
          // varies at runtime, and will not have been set yet.  Plus the
          // code requiring initialization isn't called when digits == 0.
-         if(std::numeric_limits<T>::digits)
+         if (boost::math::numeric_limits<T>::digits)
          {
             boost::math::gamma_p(static_cast<T>(400), static_cast<T>(400), Policy());
          }
       }
-      static void do_init(const std::integral_constant<int, 53>&){}
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 53>&){}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
-   static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_STATIC const init initializer;
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -1880,10 +2207,10 @@ struct lgamma_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          typedef typename policies::precision<T, Policy>::type precision_type;
-         typedef std::integral_constant<int,
+         typedef boost::math::integral_constant<int,
             precision_type::value <= 0 ? 0 :
             precision_type::value <= 64 ? 64 :
             precision_type::value <= 113 ? 113 : 0
@@ -1891,28 +2218,30 @@ struct lgamma_initializer
 
          do_init(tag_type());
       }
-      static void do_init(const std::integral_constant<int, 64>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 64>&)
       {
          boost::math::lgamma(static_cast<T>(2.5), Policy());
          boost::math::lgamma(static_cast<T>(1.25), Policy());
          boost::math::lgamma(static_cast<T>(1.75), Policy());
       }
-      static void do_init(const std::integral_constant<int, 113>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 113>&)
       {
          boost::math::lgamma(static_cast<T>(2.5), Policy());
          boost::math::lgamma(static_cast<T>(1.25), Policy());
          boost::math::lgamma(static_cast<T>(1.5), Policy());
          boost::math::lgamma(static_cast<T>(1.75), Policy());
       }
-      static void do_init(const std::integral_constant<int, 0>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 0>&)
       {
       }
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
-   static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_STATIC const init initializer;
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -1920,8 +2249,8 @@ template <class T, class Policy>
 const typename lgamma_initializer<T, Policy>::init lgamma_initializer<T, Policy>::initializer;
 
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
-   tgamma(T1 a, T2 z, const Policy&, const std::false_type)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
+   tgamma(T1 a, T2 z, const Policy&, const boost::math::false_type)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef tools::promote_args_t<T1, T2> result_type;
@@ -1943,8 +2272,8 @@ inline tools::promote_args_t<T1, T2>
 }
 
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
-   tgamma(T1 a, T2 z, const std::false_type& tag)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
+   tgamma(T1 a, T2 z, const boost::math::false_type& tag)
 {
    return tgamma(a, z, policies::policy<>(), tag);
 }
@@ -1952,15 +2281,8 @@ inline tools::promote_args_t<T1, T2>
 
 } // namespace detail
 
-template <class T>
-inline typename tools::promote_args<T>::type
-   tgamma(T z)
-{
-   return tgamma(z, policies::policy<>());
-}
-
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    lgamma(T z, int* sign, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -1980,28 +2302,28 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    lgamma(T z, int* sign)
 {
    return lgamma(z, sign, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    lgamma(T x, const Policy& pol)
 {
    return ::boost::math::lgamma(x, nullptr, pol);
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    lgamma(T x)
 {
    return ::boost::math::lgamma(x, nullptr, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    tgamma1pm1(T z, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -2015,11 +2337,11 @@ inline typename tools::promote_args<T>::type
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   return policies::checked_narrowing_cast<typename std::remove_cv<result_type>::type, forwarding_policy>(detail::tgammap1m1_imp(static_cast<value_type>(z), forwarding_policy(), evaluation_type()), "boost::math::tgamma1pm1<%!%>(%1%)");
+   return policies::checked_narrowing_cast<typename boost::math::remove_cv<result_type>::type, forwarding_policy>(detail::tgammap1m1_imp(static_cast<value_type>(z), forwarding_policy(), evaluation_type()), "boost::math::tgamma1pm1<%!%>(%1%)");
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    tgamma1pm1(T z)
 {
    return tgamma1pm1(z, policies::policy<>());
@@ -2029,7 +2351,7 @@ inline typename tools::promote_args<T>::type
 // Full upper incomplete gamma:
 //
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma(T1 a, T2 z)
 {
    //
@@ -2041,17 +2363,23 @@ inline tools::promote_args_t<T1, T2>
    return static_cast<result_type>(detail::tgamma(a, z, maybe_policy()));
 }
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma(T1 a, T2 z, const Policy& pol)
 {
    using result_type = tools::promote_args_t<T1, T2>;
-   return static_cast<result_type>(detail::tgamma(a, z, pol, std::false_type()));
+   return static_cast<result_type>(detail::tgamma(a, z, pol, boost::math::false_type()));
+}
+template <class T>
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
+   tgamma(T z)
+{
+   return tgamma(z, policies::policy<>());
 }
 //
 // Full lower incomplete gamma:
 //
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma_lower(T1 a, T2 z, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -2073,7 +2401,7 @@ inline tools::promote_args_t<T1, T2>
       forwarding_policy(), static_cast<value_type*>(nullptr)), "tgamma_lower<%1%>(%1%, %1%)");
 }
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma_lower(T1 a, T2 z)
 {
    return tgamma_lower(a, z, policies::policy<>());
@@ -2082,7 +2410,7 @@ inline tools::promote_args_t<T1, T2>
 // Regularised upper incomplete gamma:
 //
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    gamma_q(T1 a, T2 z, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -2104,7 +2432,7 @@ inline tools::promote_args_t<T1, T2>
       forwarding_policy(), static_cast<value_type*>(nullptr)), "gamma_q<%1%>(%1%, %1%)");
 }
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    gamma_q(T1 a, T2 z)
 {
    return gamma_q(a, z, policies::policy<>());
@@ -2113,7 +2441,7 @@ inline tools::promote_args_t<T1, T2>
 // Regularised lower incomplete gamma:
 //
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    gamma_p(T1 a, T2 z, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -2135,7 +2463,7 @@ inline tools::promote_args_t<T1, T2>
       forwarding_policy(), static_cast<value_type*>(nullptr)), "gamma_p<%1%>(%1%, %1%)");
 }
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    gamma_p(T1 a, T2 z)
 {
    return gamma_p(a, z, policies::policy<>());
@@ -2143,7 +2471,7 @@ inline tools::promote_args_t<T1, T2>
 
 // ratios of gamma functions:
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma_delta_ratio(T1 z, T2 delta, const Policy& /* pol */)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -2159,13 +2487,13 @@ inline tools::promote_args_t<T1, T2>
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::tgamma_delta_ratio_imp(static_cast<value_type>(z), static_cast<value_type>(delta), forwarding_policy()), "boost::math::tgamma_delta_ratio<%1%>(%1%, %1%)");
 }
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma_delta_ratio(T1 z, T2 delta)
 {
    return tgamma_delta_ratio(z, delta, policies::policy<>());
 }
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma_ratio(T1 a, T2 b, const Policy&)
 {
    typedef tools::promote_args_t<T1, T2> result_type;
@@ -2180,14 +2508,14 @@ inline tools::promote_args_t<T1, T2>
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::tgamma_ratio_imp(static_cast<value_type>(a), static_cast<value_type>(b), forwarding_policy()), "boost::math::tgamma_delta_ratio<%1%>(%1%, %1%)");
 }
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    tgamma_ratio(T1 a, T2 b)
 {
    return tgamma_ratio(a, b, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    gamma_p_derivative(T1 a, T2 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
@@ -2203,7 +2531,7 @@ inline tools::promote_args_t<T1, T2>
    return policies::checked_narrowing_cast<result_type, forwarding_policy>(detail::gamma_p_derivative_imp(static_cast<value_type>(a), static_cast<value_type>(x), forwarding_policy()), "boost::math::gamma_p_derivative<%1%>(%1%, %1%)");
 }
 template <class T1, class T2>
-inline tools::promote_args_t<T1, T2>
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T1, T2>
    gamma_p_derivative(T1 a, T2 x)
 {
    return gamma_p_derivative(a, x, policies::policy<>());
diff --git a/include/boost/math/special_functions/gegenbauer.hpp b/include/boost/math/special_functions/gegenbauer.hpp
index b7033cd14f..70324cf656 100644
--- a/include/boost/math/special_functions/gegenbauer.hpp
+++ b/include/boost/math/special_functions/gegenbauer.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright Nick Thompson 2019.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -6,21 +7,25 @@
 #ifndef BOOST_MATH_SPECIAL_GEGENBAUER_HPP
 #define BOOST_MATH_SPECIAL_GEGENBAUER_HPP
 
-#include <limits>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+
+#ifndef BOOST_MATH_NO_EXCEPTIONS
 #include <stdexcept>
-#include <type_traits>
+#endif
 
 namespace boost { namespace math {
 
 template<typename Real>
-Real gegenbauer(unsigned n, Real lambda, Real x)
+BOOST_MATH_GPU_ENABLED Real gegenbauer(unsigned n, Real lambda, Real x)
 {
-    static_assert(!std::is_integral<Real>::value, "Gegenbauer polynomials required floating point arguments.");
+    static_assert(!boost::math::is_integral<Real>::value, "Gegenbauer polynomials required floating point arguments.");
     if (lambda <= -1/Real(2)) {
 #ifndef BOOST_MATH_NO_EXCEPTIONS
        throw std::domain_error("lambda > -1/2 is required.");
 #else
-       return std::numeric_limits<Real>::quiet_NaN();
+       return boost::math::numeric_limits<Real>::quiet_NaN();
 #endif
     }
     // The only reason to do this is because of some instability that could be present for x < 0 that is not present for x > 0.
@@ -41,7 +46,7 @@ Real gegenbauer(unsigned n, Real lambda, Real x)
 
     Real yk = y1;
     Real k = 2;
-    Real k_max = n*(1+std::numeric_limits<Real>::epsilon());
+    Real k_max = n*(1+boost::math::numeric_limits<Real>::epsilon());
     Real gamma = 2*(lambda - 1);
     while(k < k_max)
     {
@@ -55,7 +60,7 @@ Real gegenbauer(unsigned n, Real lambda, Real x)
 
 
 template<typename Real>
-Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k)
+BOOST_MATH_GPU_ENABLED Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k)
 {
     if (k > n) {
         return Real(0);
@@ -70,7 +75,7 @@ Real gegenbauer_derivative(unsigned n, Real lambda, Real x, unsigned k)
 }
 
 template<typename Real>
-Real gegenbauer_prime(unsigned n, Real lambda, Real x) {
+BOOST_MATH_GPU_ENABLED Real gegenbauer_prime(unsigned n, Real lambda, Real x) {
     return gegenbauer_derivative<Real>(n, lambda, x, 1);
 }
 
diff --git a/include/boost/math/special_functions/hankel.hpp b/include/boost/math/special_functions/hankel.hpp
index 51b8390d99..730c7afa03 100644
--- a/include/boost/math/special_functions/hankel.hpp
+++ b/include/boost/math/special_functions/hankel.hpp
@@ -1,4 +1,5 @@
 // Copyright John Maddock 2012.
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -7,26 +8,31 @@
 #ifndef BOOST_MATH_HANKEL_HPP
 #define BOOST_MATH_HANKEL_HPP
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/complex.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/detail/iconv.hpp>
+#include <boost/math/constants/constants.hpp>
+#include <boost/math/policies/error_handling.hpp>
 
 namespace boost{ namespace math{
 
 namespace detail{
 
 template <class T, class Policy>
-std::complex<T> hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol, int sign)
+BOOST_MATH_GPU_ENABLED boost::math::complex<T> hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol, int sign)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::cyl_hankel_1<%1%>(%1%,%1%)";
+   constexpr auto function = "boost::math::cyl_hankel_1<%1%>(%1%,%1%)";
 
    if(x < 0)
    {
       bool isint_v = floor(v) == v;
       T j, y;
       bessel_jy(v, -x, &j, &y, need_j | need_y, pol);
-      std::complex<T> cx(x), cv(v);
-      std::complex<T> j_result, y_result;
+      boost::math::complex<T> cx(x), cv(v);
+      boost::math::complex<T> j_result, y_result;
       if(isint_v)
       {
          int s = (iround(v) & 1) ? -1 : 1;
@@ -37,12 +43,12 @@ std::complex<T> hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol
       {
          j_result = pow(cx, v) * pow(-cx, -v) * j;
          T p1 = pow(-x, v);
-         std::complex<T> p2 = pow(cx, v);
+         boost::math::complex<T> p2 = pow(cx, v);
          y_result = p1 * y / p2
             + (p2 / p1 - p1 / p2) * j / tan(constants::pi<T>() * v);
       }
       // multiply y_result by i:
-      y_result = std::complex<T>(-sign * y_result.imag(), sign * y_result.real());
+      y_result = boost::math::complex<T>(-sign * y_result.imag(), sign * y_result.real());
       return j_result + y_result;
    }
 
@@ -51,25 +57,25 @@ std::complex<T> hankel_imp(T v, T x, const bessel_no_int_tag&, const Policy& pol
       if(v == 0)
       {
          // J is 1, Y is -INF
-         return std::complex<T>(1, sign * -policies::raise_overflow_error<T>(function, nullptr, pol));
+         return boost::math::complex<T>(1, sign * -policies::raise_overflow_error<T>(function, nullptr, pol));
       }
       else
       {
          // At least one of J and Y is complex infinity:
-         return std::complex<T>(policies::raise_overflow_error<T>(function, nullptr, pol), sign * policies::raise_overflow_error<T>(function, nullptr, pol));
+         return boost::math::complex<T>(policies::raise_overflow_error<T>(function, nullptr, pol), sign * policies::raise_overflow_error<T>(function, nullptr, pol));
       }
    }
 
    T j, y;
    bessel_jy(v, x, &j, &y, need_j | need_y, pol);
-   return std::complex<T>(j, sign * y);
+   return boost::math::complex<T>(j, sign * y);
 }
 
 template <class T, class Policy>
-std::complex<T> hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign);
+BOOST_MATH_GPU_ENABLED boost::math::complex<T> hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign);
 
 template <class T, class Policy>
-inline std::complex<T> hankel_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol, int sign)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<T> hankel_imp(T v, T x, const bessel_maybe_int_tag&, const Policy& pol, int sign)
 {
    BOOST_MATH_STD_USING  // ADL of std names.
    int ival = detail::iconv(v, pol);
@@ -81,57 +87,57 @@ inline std::complex<T> hankel_imp(T v, T x, const bessel_maybe_int_tag&, const P
 }
 
 template <class T, class Policy>
-inline std::complex<T> hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<T> hankel_imp(int v, T x, const bessel_int_tag&, const Policy& pol, int sign)
 {
    BOOST_MATH_STD_USING
-   if((std::abs(v) < 200) && (x > 0))
-      return std::complex<T>(bessel_jn(v, x, pol), sign * bessel_yn(v, x, pol));
+   if((abs(v) < 200) && (x > 0))
+      return boost::math::complex<T>(bessel_jn(v, x, pol), sign * bessel_yn(v, x, pol));
    return hankel_imp(static_cast<T>(v), x, bessel_no_int_tag(), pol, sign);
 }
 
 template <class T, class Policy>
-inline std::complex<T> sph_hankel_imp(T v, T x, const Policy& pol, int sign)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<T> sph_hankel_imp(T v, T x, const Policy& pol, int sign)
 {
    BOOST_MATH_STD_USING
-   return constants::root_half_pi<T>() * hankel_imp(v + 0.5f, x, bessel_no_int_tag(), pol, sign) / sqrt(std::complex<T>(x));
+   return constants::root_half_pi<T>() * hankel_imp(v + 0.5f, x, bessel_no_int_tag(), pol, sign) / sqrt(boost::math::complex<T>(x));
 }
 
 } // namespace detail
 
 template <class T1, class T2, class Policy>
-inline std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
    typedef typename detail::bessel_traits<T1, T2, Policy>::optimisation_tag tag_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
-   return policies::checked_narrowing_cast<std::complex<result_type>, Policy>(detail::hankel_imp<value_type>(v, static_cast<value_type>(x), tag_type(), pol, 1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)");
+   return policies::checked_narrowing_cast<boost::math::complex<result_type>, Policy>(detail::hankel_imp<value_type>(v, static_cast<value_type>(x), tag_type(), pol, 1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)");
 }
 
 template <class T1, class T2>
-inline std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_1(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_1(T1 v, T2 x)
 {
    return cyl_hankel_1(v, x, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
    typedef typename detail::bessel_traits<T1, T2, Policy>::optimisation_tag tag_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
-   return policies::checked_narrowing_cast<std::complex<result_type>, Policy>(detail::hankel_imp<value_type>(v, static_cast<value_type>(x), tag_type(), pol, -1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)");
+   return policies::checked_narrowing_cast<boost::math::complex<result_type>, Policy>(detail::hankel_imp<value_type>(v, static_cast<value_type>(x), tag_type(), pol, -1), "boost::math::cyl_hankel_1<%1%>(%1%,%1%)");
 }
 
 template <class T1, class T2>
-inline std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_2(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_2(T1 v, T2 x)
 {
    return cyl_hankel_2(v, x, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_1(T1 v, T2 x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_1(T1 v, T2 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
@@ -143,17 +149,17 @@ inline std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type>
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   return policies::checked_narrowing_cast<std::complex<result_type>, Policy>(detail::sph_hankel_imp<value_type>(static_cast<value_type>(v), static_cast<value_type>(x), forwarding_policy(), 1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)");
+   return policies::checked_narrowing_cast<boost::math::complex<result_type>, Policy>(detail::sph_hankel_imp<value_type>(static_cast<value_type>(v), static_cast<value_type>(x), forwarding_policy(), 1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)");
 }
 
 template <class T1, class T2>
-inline std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_1(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_1(T1 v, T2 x)
 {
    return sph_hankel_1(v, x, policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_2(T1 v, T2 x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_2(T1 v, T2 x, const Policy&)
 {
    BOOST_FPU_EXCEPTION_GUARD
    typedef typename detail::bessel_traits<T1, T2, Policy>::result_type result_type;
@@ -165,11 +171,11 @@ inline std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type>
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   return policies::checked_narrowing_cast<std::complex<result_type>, Policy>(detail::sph_hankel_imp<value_type>(static_cast<value_type>(v), static_cast<value_type>(x), forwarding_policy(), -1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)");
+   return policies::checked_narrowing_cast<boost::math::complex<result_type>, Policy>(detail::sph_hankel_imp<value_type>(static_cast<value_type>(v), static_cast<value_type>(x), forwarding_policy(), -1), "boost::math::sph_hankel_1<%1%>(%1%,%1%)");
 }
 
 template <class T1, class T2>
-inline std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_2(T1 v, T2 x)
+BOOST_MATH_GPU_ENABLED inline boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_2(T1 v, T2 x)
 {
    return sph_hankel_2(v, x, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/hermite.hpp b/include/boost/math/special_functions/hermite.hpp
index 81ccb2ac66..3d77fc03e3 100644
--- a/include/boost/math/special_functions/hermite.hpp
+++ b/include/boost/math/special_functions/hermite.hpp
@@ -1,5 +1,6 @@
 
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -11,8 +12,9 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/promotion.hpp>
+#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/policies/error_handling.hpp>
 
 namespace boost{
@@ -20,7 +22,7 @@ namespace math{
 
 // Recurrence relation for Hermite polynomials:
 template <class T1, class T2, class T3>
-inline typename tools::promote_args<T1, T2, T3>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2, T3>::type 
    hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1)
 {
    using promoted_type = tools::promote_args_t<T1, T2, T3>;
@@ -31,7 +33,7 @@ namespace detail{
 
 // Implement Hermite polynomials via recurrence:
 template <class T>
-T hermite_imp(unsigned n, T x)
+BOOST_MATH_GPU_ENABLED T hermite_imp(unsigned n, T x)
 {
    T p0 = 1;
    T p1 = 2 * x;
@@ -43,7 +45,7 @@ T hermite_imp(unsigned n, T x)
 
    while(c < n)
    {
-      std::swap(p0, p1);
+      BOOST_MATH_GPU_SAFE_SWAP(p0, p1);
       p1 = static_cast<T>(hermite_next(c, x, p0, p1));
       ++c;
    }
@@ -53,7 +55,7 @@ T hermite_imp(unsigned n, T x)
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type 
    hermite(unsigned n, T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
@@ -62,7 +64,7 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type 
    hermite(unsigned n, T x)
 {
    return boost::math::hermite(n, x, policies::policy<>());
diff --git a/include/boost/math/special_functions/heuman_lambda.hpp b/include/boost/math/special_functions/heuman_lambda.hpp
index 0fbf4a9803..05002725f2 100644
--- a/include/boost/math/special_functions/heuman_lambda.hpp
+++ b/include/boost/math/special_functions/heuman_lambda.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2015 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,6 +11,9 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_rj.hpp>
 #include <boost/math/special_functions/ellint_1.hpp>
@@ -26,13 +30,13 @@ namespace detail{
 
 // Elliptic integral - Jacobi Zeta
 template <typename T, typename Policy>
-T heuman_lambda_imp(T phi, T k, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T heuman_lambda_imp(T phi, T k, const Policy& pol)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
     using namespace boost::math::constants;
 
-    const char* function = "boost::math::heuman_lambda<%1%>(%1%, %1%)";
+    constexpr auto function = "boost::math::heuman_lambda<%1%>(%1%, %1%)";
 
     if(fabs(k) > 1)
        return policies::raise_domain_error<T>(function, "We require |k| <= 1 but got k = %1%", k, pol);
@@ -51,10 +55,10 @@ T heuman_lambda_imp(T phi, T k, const Policy& pol)
     }
     else
     {
-          typedef std::integral_constant<int,
-             std::is_floating_point<T>::value&& std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 54) ? 0 :
-             std::is_floating_point<T>::value && std::numeric_limits<T>::digits && (std::numeric_limits<T>::digits <= 64) ? 1 : 2
-          > precision_tag_type;
+       typedef boost::math::integral_constant<int,
+          boost::math::is_floating_point<T>::value && boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 54) ? 0 :
+          boost::math::is_floating_point<T>::value && boost::math::numeric_limits<T>::digits && (boost::math::numeric_limits<T>::digits <= 64) ? 1 : 2
+       > precision_tag_type;
 
        T rkp = sqrt(kp);
        T ratio;
@@ -63,7 +67,9 @@ T heuman_lambda_imp(T phi, T k, const Policy& pol)
           return policies::raise_domain_error<T>(function, "When 1-k^2 == 1 then phi must be < Pi/2, but got phi = %1%", phi, pol);
        }
        else
+       {
           ratio = ellint_f_imp(phi, rkp, pol, k2) / ellint_k_imp(rkp, pol, k2);
+       }
        result = ratio + ellint_k_imp(k, pol, precision_tag_type()) * jacobi_zeta_imp(phi, rkp, pol, k2) / constants::half_pi<T>();
     }
     return result;
@@ -72,7 +78,7 @@ T heuman_lambda_imp(T phi, T k, const Policy& pol)
 } // detail
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type heuman_lambda(T1 k, T2 phi, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type heuman_lambda(T1 k, T2 phi, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -80,7 +86,7 @@ inline typename tools::promote_args<T1, T2>::type heuman_lambda(T1 k, T2 phi, co
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type heuman_lambda(T1 k, T2 phi)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type heuman_lambda(T1 k, T2 phi)
 {
    return boost::math::heuman_lambda(k, phi, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/hypot.hpp b/include/boost/math/special_functions/hypot.hpp
index c56c751102..f38e37e872 100644
--- a/include/boost/math/special_functions/hypot.hpp
+++ b/include/boost/math/special_functions/hypot.hpp
@@ -12,20 +12,20 @@
 
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
-#include <algorithm> // for swap
-#include <cmath>
 
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-T hypot_imp(T x, T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED T hypot_imp(T x, T y, const Policy& pol)
 {
    //
    // Normalize x and y, so that both are positive and x >= y:
    //
-   using std::fabs; using std::sqrt; // ADL of std names
+   BOOST_MATH_STD_USING
 
    x = fabs(x);
    y = fabs(y);
@@ -35,16 +35,16 @@ T hypot_imp(T x, T y, const Policy& pol)
 #pragma warning(disable: 4127)
 #endif
    // special case, see C99 Annex F:
-   if(std::numeric_limits<T>::has_infinity
-      && ((x == std::numeric_limits<T>::infinity())
-      || (y == std::numeric_limits<T>::infinity())))
+   if(boost::math::numeric_limits<T>::has_infinity
+      && ((x == boost::math::numeric_limits<T>::infinity())
+      || (y == boost::math::numeric_limits<T>::infinity())))
       return policies::raise_overflow_error<T>("boost::math::hypot<%1%>(%1%,%1%)", nullptr, pol);
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
 
    if(y > x)
-      (std::swap)(x, y);
+      BOOST_MATH_GPU_SAFE_SWAP(x, y);
 
    if(x * tools::epsilon<T>() >= y)
       return x;
@@ -56,7 +56,7 @@ T hypot_imp(T x, T y, const Policy& pol)
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    hypot(T1 x, T2 y)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
@@ -65,7 +65,7 @@ inline typename tools::promote_args<T1, T2>::type
 }
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    hypot(T1 x, T2 y, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
diff --git a/include/boost/math/special_functions/jacobi_zeta.hpp b/include/boost/math/special_functions/jacobi_zeta.hpp
index c4ba7d23d2..8b6f80912d 100644
--- a/include/boost/math/special_functions/jacobi_zeta.hpp
+++ b/include/boost/math/special_functions/jacobi_zeta.hpp
@@ -11,6 +11,8 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/promotion.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/ellint_1.hpp>
 #include <boost/math/special_functions/ellint_rj.hpp>
@@ -27,7 +29,7 @@ namespace detail{
 
 // Elliptic integral - Jacobi Zeta
 template <typename T, typename Policy>
-T jacobi_zeta_imp(T phi, T k, const Policy& pol, T kp)
+BOOST_MATH_GPU_ENABLED T jacobi_zeta_imp(T phi, T k, const Policy& pol, T kp)
 {
     BOOST_MATH_STD_USING
     using namespace boost::math::tools;
@@ -55,14 +57,14 @@ T jacobi_zeta_imp(T phi, T k, const Policy& pol, T kp)
     return invert ? T(-result) : result;
 }
 template <typename T, typename Policy>
-inline T jacobi_zeta_imp(T phi, T k, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T jacobi_zeta_imp(T phi, T k, const Policy& pol)
 {
    return jacobi_zeta_imp(phi, k, pol, T(1 - k * k));
 }
 } // detail
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type jacobi_zeta(T1 k, T2 phi, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type jacobi_zeta(T1 k, T2 phi, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -70,7 +72,7 @@ inline typename tools::promote_args<T1, T2>::type jacobi_zeta(T1 k, T2 phi, cons
 }
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type jacobi_zeta(T1 k, T2 phi)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type jacobi_zeta(T1 k, T2 phi)
 {
    return boost::math::jacobi_zeta(k, phi, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/lanczos.hpp b/include/boost/math/special_functions/lanczos.hpp
index d75a968cdb..0ec24bddbf 100644
--- a/include/boost/math/special_functions/lanczos.hpp
+++ b/include/boost/math/special_functions/lanczos.hpp
@@ -11,12 +11,16 @@
 #endif
 
 #include <boost/math/tools/config.hpp>
-#include <boost/math/tools/big_constant.hpp>
 #include <boost/math/tools/rational.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/policies/policy.hpp>
-#include <limits>
-#include <type_traits>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/tools/big_constant.hpp>
 #include <cstdint>
+#endif
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -48,7 +52,7 @@ namespace boost{ namespace math{ namespace lanczos{
 // Default version assumes all g() values are the same.
 //
 template <class L>
-inline double lanczos_g_near_1_and_2(const L&)
+BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const L&)
 {
    return L::g();
 }
@@ -59,17 +63,17 @@ inline double lanczos_g_near_1_and_2(const L&)
 // Max experimental error (with arbitrary precision arithmetic) 9.516e-12
 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006
 //
-struct lanczos6 : public std::integral_constant<int, 35>
+struct lanczos6 : public boost::math::integral_constant<int, 35>
 {
    //
    // Produces slightly better than float precision when evaluated at
    // double precision:
    //
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[6] = {
+      BOOST_MATH_STATIC const T num[6] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 8706.349592549009182288174442774377925882)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 8523.650341121874633477483696775067709735)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 3338.029219476423550899999750161289306564)),
@@ -77,23 +81,23 @@ struct lanczos6 : public std::integral_constant<int, 35>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 63.99951844938187085666201263218840287667)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 2.506628274631006311133031631822390264407))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = {
-         static_cast<std::uint16_t>(0u),
-         static_cast<std::uint16_t>(24u),
-         static_cast<std::uint16_t>(50u),
-         static_cast<std::uint16_t>(35u),
-         static_cast<std::uint16_t>(10u),
-         static_cast<std::uint16_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = {
+         static_cast<boost::math::uint16_t>(0u),
+         static_cast<boost::math::uint16_t>(24u),
+         static_cast<boost::math::uint16_t>(50u),
+         static_cast<boost::math::uint16_t>(35u),
+         static_cast<boost::math::uint16_t>(10u),
+         static_cast<boost::math::uint16_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[6] = {
+      BOOST_MATH_STATIC const T num[6] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 32.81244541029783471623665933780748627823)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 32.12388941444332003446077108933558534361)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 12.58034729455216106950851080138931470954)),
@@ -101,13 +105,13 @@ struct lanczos6 : public std::integral_constant<int, 35>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 0.2412010548258800231126240760264822486599)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 0.009446967704539249494420221613134244048319))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = {
-         static_cast<std::uint16_t>(0u),
-         static_cast<std::uint16_t>(24u),
-         static_cast<std::uint16_t>(50u),
-         static_cast<std::uint16_t>(35u),
-         static_cast<std::uint16_t>(10u),
-         static_cast<std::uint16_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = {
+         static_cast<boost::math::uint16_t>(0u),
+         static_cast<boost::math::uint16_t>(24u),
+         static_cast<boost::math::uint16_t>(50u),
+         static_cast<boost::math::uint16_t>(35u),
+         static_cast<boost::math::uint16_t>(10u),
+         static_cast<boost::math::uint16_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
@@ -115,10 +119,10 @@ struct lanczos6 : public std::integral_constant<int, 35>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[5] = {
+      BOOST_MATH_STATIC const T d[5] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 2.044879010930422922760429926121241330235)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, -2.751366405578505366591317846728753993668)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 1.02282965224225004296750609604264824677)),
@@ -135,10 +139,10 @@ struct lanczos6 : public std::integral_constant<int, 35>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[5] = {
+      BOOST_MATH_STATIC const T d[5] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 5.748142489536043490764289256167080091892)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, -7.734074268282457156081021756682138251825)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 35, 2.875167944990511006997713242805893543947)),
@@ -155,7 +159,7 @@ struct lanczos6 : public std::integral_constant<int, 35>
       return result;
    }
 
-   static double g(){ return 5.581000000000000405009359383257105946541; }
+   BOOST_MATH_GPU_ENABLED static double g(){ return 5.581000000000000405009359383257105946541; }
 };
 
 //
@@ -163,17 +167,17 @@ struct lanczos6 : public std::integral_constant<int, 35>
 // Max experimental error (with arbitrary precision arithmetic) 2.16676e-19
 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006
 //
-struct lanczos11 : public std::integral_constant<int, 60>
+struct lanczos11 : public boost::math::integral_constant<int, 60>
 {
    //
    // Produces slightly better than double precision when evaluated at
    // extended-double precision:
    //
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[11] = {
+      BOOST_MATH_STATIC const T num[11] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 38474670393.31776828316099004518914832218)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 36857665043.51950660081971227404959150474)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 15889202453.72942008945006665994637853242)),
@@ -186,28 +190,28 @@ struct lanczos11 : public std::integral_constant<int, 60>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 261.6140441641668190791708576058805625502)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 2.506628274631000502415573855452633787834))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[11] = {
-         static_cast<std::uint32_t>(0u),
-         static_cast<std::uint32_t>(362880u),
-         static_cast<std::uint32_t>(1026576u),
-         static_cast<std::uint32_t>(1172700u),
-         static_cast<std::uint32_t>(723680u),
-         static_cast<std::uint32_t>(269325u),
-         static_cast<std::uint32_t>(63273u),
-         static_cast<std::uint32_t>(9450u),
-         static_cast<std::uint32_t>(870u),
-         static_cast<std::uint32_t>(45u),
-         static_cast<std::uint32_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[11] = {
+         static_cast<boost::math::uint32_t>(0u),
+         static_cast<boost::math::uint32_t>(362880u),
+         static_cast<boost::math::uint32_t>(1026576u),
+         static_cast<boost::math::uint32_t>(1172700u),
+         static_cast<boost::math::uint32_t>(723680u),
+         static_cast<boost::math::uint32_t>(269325u),
+         static_cast<boost::math::uint32_t>(63273u),
+         static_cast<boost::math::uint32_t>(9450u),
+         static_cast<boost::math::uint32_t>(870u),
+         static_cast<boost::math::uint32_t>(45u),
+         static_cast<boost::math::uint32_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[11] = {
+      BOOST_MATH_STATIC const T num[11] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 709811.662581657956893540610814842699825)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 679979.847415722640161734319823103390728)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 293136.785721159725251629480984140341656)),
@@ -220,18 +224,18 @@ struct lanczos11 : public std::integral_constant<int, 60>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 0.004826466289237661857584712046231435101741)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 0.4624429436045378766270459638520555557321e-4))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[11] = {
-         static_cast<std::uint32_t>(0u),
-         static_cast<std::uint32_t>(362880u),
-         static_cast<std::uint32_t>(1026576u),
-         static_cast<std::uint32_t>(1172700u),
-         static_cast<std::uint32_t>(723680u),
-         static_cast<std::uint32_t>(269325u),
-         static_cast<std::uint32_t>(63273u),
-         static_cast<std::uint32_t>(9450u),
-         static_cast<std::uint32_t>(870u),
-         static_cast<std::uint32_t>(45u),
-         static_cast<std::uint32_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[11] = {
+         static_cast<boost::math::uint32_t>(0u),
+         static_cast<boost::math::uint32_t>(362880u),
+         static_cast<boost::math::uint32_t>(1026576u),
+         static_cast<boost::math::uint32_t>(1172700u),
+         static_cast<boost::math::uint32_t>(723680u),
+         static_cast<boost::math::uint32_t>(269325u),
+         static_cast<boost::math::uint32_t>(63273u),
+         static_cast<boost::math::uint32_t>(9450u),
+         static_cast<boost::math::uint32_t>(870u),
+         static_cast<boost::math::uint32_t>(45u),
+         static_cast<boost::math::uint32_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
@@ -239,10 +243,10 @@ struct lanczos11 : public std::integral_constant<int, 60>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[10] = {
+      BOOST_MATH_STATIC const T d[10] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 4.005853070677940377969080796551266387954)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, -13.17044315127646469834125159673527183164)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 17.19146865350790353683895137079288129318)),
@@ -264,10 +268,10 @@ struct lanczos11 : public std::integral_constant<int, 60>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[10] = {
+      BOOST_MATH_STATIC const T d[10] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 19.05889633808148715159575716844556056056)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, -62.66183664701721716960978577959655644762)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 60, 81.7929198065004751699057192860287512027)),
@@ -289,7 +293,7 @@ struct lanczos11 : public std::integral_constant<int, 60>
       return result;
    }
 
-   static double g(){ return 10.90051099999999983936049829935654997826; }
+   BOOST_MATH_GPU_ENABLED static double g(){ return 10.90051099999999983936049829935654997826; }
 };
 
 //
@@ -297,17 +301,17 @@ struct lanczos11 : public std::integral_constant<int, 60>
 // Max experimental error (with arbitrary precision arithmetic) 9.2213e-23
 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006
 //
-struct lanczos13 : public std::integral_constant<int, 72>
+struct lanczos13 : public boost::math::integral_constant<int, 72>
 {
    //
    // Produces slightly better than extended-double precision when evaluated at
    // higher precision:
    //
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[13] = {
+      BOOST_MATH_STATIC const T num[13] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 44012138428004.60895436261759919070125699)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 41590453358593.20051581730723108131357995)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 18013842787117.99677796276038389462742949)),
@@ -322,30 +326,30 @@ struct lanczos13 : public std::integral_constant<int, 72>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 381.8801248632926870394389468349331394196)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 2.506628274631000502415763426076722427007))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = {
-         static_cast<std::uint32_t>(0u),
-         static_cast<std::uint32_t>(39916800u),
-         static_cast<std::uint32_t>(120543840u),
-         static_cast<std::uint32_t>(150917976u),
-         static_cast<std::uint32_t>(105258076u),
-         static_cast<std::uint32_t>(45995730u),
-         static_cast<std::uint32_t>(13339535u),
-         static_cast<std::uint32_t>(2637558u),
-         static_cast<std::uint32_t>(357423u),
-         static_cast<std::uint32_t>(32670u),
-         static_cast<std::uint32_t>(1925u),
-         static_cast<std::uint32_t>(66u),
-         static_cast<std::uint32_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = {
+         static_cast<boost::math::uint32_t>(0u),
+         static_cast<boost::math::uint32_t>(39916800u),
+         static_cast<boost::math::uint32_t>(120543840u),
+         static_cast<boost::math::uint32_t>(150917976u),
+         static_cast<boost::math::uint32_t>(105258076u),
+         static_cast<boost::math::uint32_t>(45995730u),
+         static_cast<boost::math::uint32_t>(13339535u),
+         static_cast<boost::math::uint32_t>(2637558u),
+         static_cast<boost::math::uint32_t>(357423u),
+         static_cast<boost::math::uint32_t>(32670u),
+         static_cast<boost::math::uint32_t>(1925u),
+         static_cast<boost::math::uint32_t>(66u),
+         static_cast<boost::math::uint32_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[13] = {
+      BOOST_MATH_STATIC const T num[13] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 86091529.53418537217994842267760536134841)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 81354505.17858011242874285785316135398567)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 35236626.38815461910817650960734605416521)),
@@ -360,20 +364,20 @@ struct lanczos13 : public std::integral_constant<int, 72>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 0.0007469903808915448316510079585999893674101)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 0.4903180573459871862552197089738373164184e-5))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = {
-         static_cast<std::uint32_t>(0u),
-         static_cast<std::uint32_t>(39916800u),
-         static_cast<std::uint32_t>(120543840u),
-         static_cast<std::uint32_t>(150917976u),
-         static_cast<std::uint32_t>(105258076u),
-         static_cast<std::uint32_t>(45995730u),
-         static_cast<std::uint32_t>(13339535u),
-         static_cast<std::uint32_t>(2637558u),
-         static_cast<std::uint32_t>(357423u),
-         static_cast<std::uint32_t>(32670u),
-         static_cast<std::uint32_t>(1925u),
-         static_cast<std::uint32_t>(66u),
-         static_cast<std::uint32_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = {
+         static_cast<boost::math::uint32_t>(0u),
+         static_cast<boost::math::uint32_t>(39916800u),
+         static_cast<boost::math::uint32_t>(120543840u),
+         static_cast<boost::math::uint32_t>(150917976u),
+         static_cast<boost::math::uint32_t>(105258076u),
+         static_cast<boost::math::uint32_t>(45995730u),
+         static_cast<boost::math::uint32_t>(13339535u),
+         static_cast<boost::math::uint32_t>(2637558u),
+         static_cast<boost::math::uint32_t>(357423u),
+         static_cast<boost::math::uint32_t>(32670u),
+         static_cast<boost::math::uint32_t>(1925u),
+         static_cast<boost::math::uint32_t>(66u),
+         static_cast<boost::math::uint32_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
@@ -381,10 +385,10 @@ struct lanczos13 : public std::integral_constant<int, 72>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[12] = {
+      BOOST_MATH_STATIC const T d[12] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 4.832115561461656947793029596285626840312)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, -19.86441536140337740383120735104359034688)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 33.9927422807443239927197864963170585331)),
@@ -408,10 +412,10 @@ struct lanczos13 : public std::integral_constant<int, 72>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[12] = {
+      BOOST_MATH_STATIC const T d[12] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 26.96979819614830698367887026728396466395)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, -110.8705424709385114023884328797900204863)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 72, 189.7258846119231466417015694690434770085)),
@@ -435,7 +439,7 @@ struct lanczos13 : public std::integral_constant<int, 72>
       return result;
    }
 
-   static double g(){ return 13.1445650000000000545696821063756942749; }
+   BOOST_MATH_GPU_ENABLED static double g(){ return 13.1445650000000000545696821063756942749; }
 };
 
 //
@@ -443,16 +447,16 @@ struct lanczos13 : public std::integral_constant<int, 72>
 // Max experimental error (with arbitrary precision arithmetic) 8.111667e-8
 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006
 //
-struct lanczos6m24 : public std::integral_constant<int, 24>
+struct lanczos6m24 : public boost::math::integral_constant<int, 24>
 {
    //
    // Use for float precision, when evaluated as a float:
    //
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[6] = {
+      BOOST_MATH_STATIC const T num[6] = {
          static_cast<T>(58.52061591769095910314047740215847630266L),
          static_cast<T>(182.5248962595894264831189414768236280862L),
          static_cast<T>(211.0971093028510041839168287718170827259L),
@@ -460,23 +464,23 @@ struct lanczos6m24 : public std::integral_constant<int, 24>
          static_cast<T>(27.5192015197455403062503721613097825345L),
          static_cast<T>(2.50662858515256974113978724717473206342L)
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = {
-         static_cast<std::uint16_t>(0u),
-         static_cast<std::uint16_t>(24u),
-         static_cast<std::uint16_t>(50u),
-         static_cast<std::uint16_t>(35u),
-         static_cast<std::uint16_t>(10u),
-         static_cast<std::uint16_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = {
+         static_cast<boost::math::uint16_t>(0u),
+         static_cast<boost::math::uint16_t>(24u),
+         static_cast<boost::math::uint16_t>(50u),
+         static_cast<boost::math::uint16_t>(35u),
+         static_cast<boost::math::uint16_t>(10u),
+         static_cast<boost::math::uint16_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[6] = {
+      BOOST_MATH_STATIC const T num[6] = {
          static_cast<T>(14.0261432874996476619570577285003839357L),
          static_cast<T>(43.74732405540314316089531289293124360129L),
          static_cast<T>(50.59547402616588964511581430025589038612L),
@@ -484,13 +488,13 @@ struct lanczos6m24 : public std::integral_constant<int, 24>
          static_cast<T>(6.595765571169314946316366571954421695196L),
          static_cast<T>(0.6007854010515290065101128585795542383721L)
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint16_t) denom[6] = {
-         static_cast<std::uint16_t>(0u),
-         static_cast<std::uint16_t>(24u),
-         static_cast<std::uint16_t>(50u),
-         static_cast<std::uint16_t>(35u),
-         static_cast<std::uint16_t>(10u),
-         static_cast<std::uint16_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint16_t) denom[6] = {
+         static_cast<boost::math::uint16_t>(0u),
+         static_cast<boost::math::uint16_t>(24u),
+         static_cast<boost::math::uint16_t>(50u),
+         static_cast<boost::math::uint16_t>(35u),
+         static_cast<boost::math::uint16_t>(10u),
+         static_cast<boost::math::uint16_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
@@ -498,10 +502,10 @@ struct lanczos6m24 : public std::integral_constant<int, 24>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[5] = {
+      BOOST_MATH_STATIC const T d[5] = {
          static_cast<T>(0.4922488055204602807654354732674868442106L),
          static_cast<T>(0.004954497451132152436631238060933905650346L),
          static_cast<T>(-0.003374784572167105840686977985330859371848L),
@@ -518,10 +522,10 @@ struct lanczos6m24 : public std::integral_constant<int, 24>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[5] = {
+      BOOST_MATH_STATIC const T d[5] = {
          static_cast<T>(0.6534966888520080645505805298901130485464L),
          static_cast<T>(0.006577461728560758362509168026049182707101L),
          static_cast<T>(-0.004480276069269967207178373559014835978161L),
@@ -538,7 +542,7 @@ struct lanczos6m24 : public std::integral_constant<int, 24>
       return result;
    }
 
-   static double g(){ return 1.428456135094165802001953125; }
+   BOOST_MATH_GPU_ENABLED static double g(){ return 1.428456135094165802001953125; }
 };
 
 //
@@ -546,16 +550,16 @@ struct lanczos6m24 : public std::integral_constant<int, 24>
 // Max experimental error (with arbitrary precision arithmetic) 1.196214e-17
 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006
 //
-struct lanczos13m53 : public std::integral_constant<int, 53>
+struct lanczos13m53 : public boost::math::integral_constant<int, 53>
 {
    //
    // Use for double precision, when evaluated as a double:
    //
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[13] = {
+      BOOST_MATH_STATIC const T num[13] = {
          static_cast<T>(23531376880.41075968857200767445163675473L),
          static_cast<T>(42919803642.64909876895789904700198885093L),
          static_cast<T>(35711959237.35566804944018545154716670596L),
@@ -570,30 +574,30 @@ struct lanczos13m53 : public std::integral_constant<int, 53>
          static_cast<T>(210.8242777515793458725097339207133627117L),
          static_cast<T>(2.506628274631000270164908177133837338626L)
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = {
-         static_cast<std::uint32_t>(0u),
-         static_cast<std::uint32_t>(39916800u),
-         static_cast<std::uint32_t>(120543840u),
-         static_cast<std::uint32_t>(150917976u),
-         static_cast<std::uint32_t>(105258076u),
-         static_cast<std::uint32_t>(45995730u),
-         static_cast<std::uint32_t>(13339535u),
-         static_cast<std::uint32_t>(2637558u),
-         static_cast<std::uint32_t>(357423u),
-         static_cast<std::uint32_t>(32670u),
-         static_cast<std::uint32_t>(1925u),
-         static_cast<std::uint32_t>(66u),
-         static_cast<std::uint32_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = {
+         static_cast<boost::math::uint32_t>(0u),
+         static_cast<boost::math::uint32_t>(39916800u),
+         static_cast<boost::math::uint32_t>(120543840u),
+         static_cast<boost::math::uint32_t>(150917976u),
+         static_cast<boost::math::uint32_t>(105258076u),
+         static_cast<boost::math::uint32_t>(45995730u),
+         static_cast<boost::math::uint32_t>(13339535u),
+         static_cast<boost::math::uint32_t>(2637558u),
+         static_cast<boost::math::uint32_t>(357423u),
+         static_cast<boost::math::uint32_t>(32670u),
+         static_cast<boost::math::uint32_t>(1925u),
+         static_cast<boost::math::uint32_t>(66u),
+         static_cast<boost::math::uint32_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[13] = {
+      BOOST_MATH_STATIC const T num[13] = {
          static_cast<T>(56906521.91347156388090791033559122686859L),
          static_cast<T>(103794043.1163445451906271053616070238554L),
          static_cast<T>(86363131.28813859145546927288977868422342L),
@@ -608,20 +612,20 @@ struct lanczos13m53 : public std::integral_constant<int, 53>
          static_cast<T>(0.5098416655656676188125178644804694509993L),
          static_cast<T>(0.006061842346248906525783753964555936883222L)
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint32_t) denom[13] = {
-         static_cast<std::uint32_t>(0u),
-         static_cast<std::uint32_t>(39916800u),
-         static_cast<std::uint32_t>(120543840u),
-         static_cast<std::uint32_t>(150917976u),
-         static_cast<std::uint32_t>(105258076u),
-         static_cast<std::uint32_t>(45995730u),
-         static_cast<std::uint32_t>(13339535u),
-         static_cast<std::uint32_t>(2637558u),
-         static_cast<std::uint32_t>(357423u),
-         static_cast<std::uint32_t>(32670u),
-         static_cast<std::uint32_t>(1925u),
-         static_cast<std::uint32_t>(66u),
-         static_cast<std::uint32_t>(1u)
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint32_t) denom[13] = {
+         static_cast<boost::math::uint32_t>(0u),
+         static_cast<boost::math::uint32_t>(39916800u),
+         static_cast<boost::math::uint32_t>(120543840u),
+         static_cast<boost::math::uint32_t>(150917976u),
+         static_cast<boost::math::uint32_t>(105258076u),
+         static_cast<boost::math::uint32_t>(45995730u),
+         static_cast<boost::math::uint32_t>(13339535u),
+         static_cast<boost::math::uint32_t>(2637558u),
+         static_cast<boost::math::uint32_t>(357423u),
+         static_cast<boost::math::uint32_t>(32670u),
+         static_cast<boost::math::uint32_t>(1925u),
+         static_cast<boost::math::uint32_t>(66u),
+         static_cast<boost::math::uint32_t>(1u)
       };
       // LCOV_EXCL_STOP
       return boost::math::tools::evaluate_rational(num, denom, z);
@@ -629,10 +633,10 @@ struct lanczos13m53 : public std::integral_constant<int, 53>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[12] = {
+      BOOST_MATH_STATIC const T d[12] = {
          static_cast<T>(2.208709979316623790862569924861841433016L),
          static_cast<T>(-3.327150580651624233553677113928873034916L),
          static_cast<T>(1.483082862367253753040442933770164111678L),
@@ -656,10 +660,10 @@ struct lanczos13m53 : public std::integral_constant<int, 53>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[12] = {
+      BOOST_MATH_STATIC const T d[12] = {
          static_cast<T>(6.565936202082889535528455955485877361223L),
          static_cast<T>(-9.8907772644920670589288081640128194231L),
          static_cast<T>(4.408830289125943377923077727900630927902L),
@@ -683,7 +687,7 @@ struct lanczos13m53 : public std::integral_constant<int, 53>
       return result;
    }
 
-   static double g(){ return 6.024680040776729583740234375; }
+   BOOST_MATH_GPU_ENABLED static double g(){ return 6.024680040776729583740234375; }
 };
 
 //
@@ -691,16 +695,16 @@ struct lanczos13m53 : public std::integral_constant<int, 53>
 // Max experimental error (with arbitrary precision arithmetic) 2.7699e-26
 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006
 //
-struct lanczos17m64 : public std::integral_constant<int, 64>
+struct lanczos17m64 : public boost::math::integral_constant<int, 64>
 {
    //
    // Use for extended-double precision, when evaluated as an extended-double:
    //
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[17] = {
+      BOOST_MATH_STATIC const T num[17] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 553681095419291969.2230556393350368550504)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 731918863887667017.2511276782146694632234)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 453393234285807339.4627124634539085143364)),
@@ -719,7 +723,7 @@ struct lanczos17m64 : public std::integral_constant<int, 64>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 488.0063567520005730476791712814838113252)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2.50662827463100050241576877135758834683))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint64_t) denom[17] = {
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint64_t) denom[17] = {
          BOOST_MATH_INT_VALUE_SUFFIX(0, uLL),
          BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL),
          BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL),
@@ -743,10 +747,10 @@ struct lanczos17m64 : public std::integral_constant<int, 64>
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[17] = {
+      BOOST_MATH_STATIC const T num[17] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2715894658327.717377557655133124376674911)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 3590179526097.912105038525528721129550434)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 2223966599737.814969312127353235818710172)),
@@ -765,7 +769,7 @@ struct lanczos17m64 : public std::integral_constant<int, 64>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.002393749522058449186690627996063983095463)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 0.1229541408909435212800785616808830746135e-4))
       };
-      static const BOOST_MATH_INT_TABLE_TYPE(T, std::uint64_t) denom[17] = {
+      BOOST_MATH_STATIC const BOOST_MATH_INT_TABLE_TYPE(T, boost::math::uint64_t) denom[17] = {
          BOOST_MATH_INT_VALUE_SUFFIX(0, uLL),
          BOOST_MATH_INT_VALUE_SUFFIX(1307674368000, uLL),
          BOOST_MATH_INT_VALUE_SUFFIX(4339163001600, uLL),
@@ -790,10 +794,10 @@ struct lanczos17m64 : public std::integral_constant<int, 64>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[16] = {
+      BOOST_MATH_STATIC const T d[16] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 4.493645054286536365763334986866616581265)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -16.95716370392468543800733966378143997694)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 26.19196892983737527836811770970479846644)),
@@ -821,10 +825,10 @@ struct lanczos17m64 : public std::integral_constant<int, 64>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[16] = {
+      BOOST_MATH_STATIC const T d[16] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 23.56409085052261327114594781581930373708)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, -88.92116338946308797946237246006238652361)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 64, 137.3472822086847596961177383569603988797)),
@@ -852,7 +856,7 @@ struct lanczos17m64 : public std::integral_constant<int, 64>
       return result;
    }
 
-   static double g(){ return 12.2252227365970611572265625; }
+   BOOST_MATH_GPU_ENABLED static double g(){ return 12.2252227365970611572265625; }
 };
 
 //
@@ -860,16 +864,16 @@ struct lanczos17m64 : public std::integral_constant<int, 64>
 // Max experimental error (with arbitrary precision arithmetic) 1.0541e-38
 // Generated with compiler: Microsoft Visual C++ version 8.0 on Win32 at Mar 23 2006
 //
-struct lanczos24m113 : public std::integral_constant<int, 113>
+struct lanczos24m113 : public boost::math::integral_constant<int, 113>
 {
    //
    // Use for long-double precision, when evaluated as an long-double:
    //
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[24] = {
+      BOOST_MATH_STATIC const T num[24] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 2029889364934367661624137213253.22102954656825019111612712252027267955023987678816620961507)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 2338599599286656537526273232565.2727349714338768161421882478417543004440597874814359063158)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 1288527989493833400335117708406.3953711906175960449186720680201425446299360322830739180195)),
@@ -895,7 +899,7 @@ struct lanczos24m113 : public std::integral_constant<int, 113>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 1151.61895453463992438325318456328526085882924197763140514450975619271382783957699017875304)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 2.50662827463100050241576528481104515966515623051532908941425544355490413900497467936202516))
       };
-      static const T denom[24] = {
+      BOOST_MATH_STATIC const T denom[24] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.0)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.112400072777760768e22)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.414847677933545472e22)),
@@ -926,10 +930,10 @@ struct lanczos24m113 : public std::integral_constant<int, 113>
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[24] = {
+      BOOST_MATH_STATIC const T num[24] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 3035162425359883494754.02878223286972654682199012688209026810841953293372712802258398358538)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 3496756894406430103600.16057175075063458536101374170860226963245118484234495645518505519827)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 1926652656689320888654.01954015145958293168365236755537645929361841917596501251362171653478)),
@@ -955,7 +959,7 @@ struct lanczos24m113 : public std::integral_constant<int, 113>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.172194142179211139195966608011235161516824700287310869949928393345257114743230967204370963e-5)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.374799931707148855771381263542708435935402853962736029347951399323367765509988401336565436e-8))
       };
-      static const T denom[24] = {
+      BOOST_MATH_STATIC const T denom[24] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.0)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.112400072777760768e22)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 0.414847677933545472e22)),
@@ -987,10 +991,10 @@ struct lanczos24m113 : public std::integral_constant<int, 113>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[23] = {
+      BOOST_MATH_STATIC const T d[23] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 7.4734083002469026177867421609938203388868806387315406134072298925733950040583068760685908)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, -50.4225805042247530267317342133388132970816607563062253708655085754357843064134941138154171)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 152.288200621747008570784082624444625293884063492396162110698238568311211546361189979357019)),
@@ -1025,10 +1029,10 @@ struct lanczos24m113 : public std::integral_constant<int, 113>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[23] = {
+      BOOST_MATH_STATIC const T d[23] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 61.4165001061101455341808888883960361969557848005400286332291451422461117307237198559485365)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, -414.372973678657049667308134761613915623353625332248315105320470271523320700386200587519147)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 113, 1251.50505818554680171298972755376376836161706773644771875668053742215217922228357204561873)),
@@ -1063,7 +1067,7 @@ struct lanczos24m113 : public std::integral_constant<int, 113>
       return result;
    }
 
-   static double g(){ return 20.3209821879863739013671875; }
+   BOOST_MATH_GPU_ENABLED static double g(){ return 20.3209821879863739013671875; }
 };
 
 //
@@ -1072,13 +1076,13 @@ struct lanczos24m113 : public std::integral_constant<int, 113>
 // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 23 2021
 // Type precision was 134 bits or 42 max_digits10
 //
-struct lanczos27MP : public std::integral_constant<int, 134>
+struct lanczos27MP : public boost::math::integral_constant<int, 134>
 {
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[27] = {
+      BOOST_MATH_STATIC const T num[27] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 2.532923291341302819860952064783714673718970e+36)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 2.715272050979243637524956158081893927075092e+36)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 1.399396313336459710065708403038293278484916e+36)),
@@ -1107,7 +1111,7 @@ struct lanczos27MP : public std::integral_constant<int, 134>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 1.580741273679785112052701460119954412080073e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 2.506628274631000502415765284811045253005320e+00))
       };
-      static const T denom[27] = {
+      BOOST_MATH_STATIC const T denom[27] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 0.000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 1.551121004333098598400000000000000000000000e+25)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 5.919012881170120359936000000000000000000000e+25)),
@@ -1141,10 +1145,10 @@ struct lanczos27MP : public std::integral_constant<int, 134>
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[27] = {
+      BOOST_MATH_STATIC const T num[27] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 4.630539114451826442425094380936505531231478e+25)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 4.963898228350662244301785145431331232866294e+25)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 2.558292778812387748738731408569861630189290e+25)),
@@ -1173,7 +1177,7 @@ struct lanczos27MP : public std::integral_constant<int, 134>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 2.889816806780013044430000551700375309307825e-08)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 4.582468135039046226997146555551548992616343e-11))
       };
-      static const T denom[27] = {
+      BOOST_MATH_STATIC const T denom[27] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 0.000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 1.551121004333098598400000000000000000000000e+25)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 5.919012881170120359936000000000000000000000e+25)),
@@ -1208,10 +1212,10 @@ struct lanczos27MP : public std::integral_constant<int, 134>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[34] = {
+      BOOST_MATH_STATIC const T d[34] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 6.264579889722939745225908247624593169040293e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, -3.470545597111704235784909052092266897169254e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 8.398164226943527197542310295220360303173237e+01)),
@@ -1257,10 +1261,10 @@ struct lanczos27MP : public std::integral_constant<int, 134>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[34] = {
+      BOOST_MATH_STATIC const T d[34] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 4.391991857844535020743473289228849738381662e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, -2.433141291692735004291785549611375831426138e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 134, 5.887812040849956173864447000497922705559488e+02)),
@@ -1306,10 +1310,10 @@ struct lanczos27MP : public std::integral_constant<int, 134>
       return result;
    }
 
-   static double g() { return 2.472513680905104038743047567550092935562134e+01; }
+   BOOST_MATH_GPU_ENABLED static double g() { return 2.472513680905104038743047567550092935562134e+01; }
 };
 
-inline double lanczos_g_near_1_and_2(const lanczos27MP&)
+BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos27MP&)
 {
    return 17.03623256087303;
 }
@@ -1320,13 +1324,13 @@ inline double lanczos_g_near_1_and_2(const lanczos27MP&)
 // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at Oct 14 2019
 // Type precision was 168 bits or 53 max_digits10
 //
-struct lanczos35MP : public std::integral_constant<int, 168>
+struct lanczos35MP : public boost::math::integral_constant<int, 168>
 {
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[35] = {
+      BOOST_MATH_STATIC const T num[35] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 2.17215050716253100021302249837728942659410271586236104e+50)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 2.51055117651708470336913962553466820524801246971658127e+50)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 1.40813458996718289733677017073036013655624930344397267e+50)),
@@ -1363,7 +1367,7 @@ struct lanczos35MP : public std::integral_constant<int, 168>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 2.50897418653428667959996348205296461689142907811767371e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 2.50662827463100050241576528481104525300698674060984055e+00))
       };
-      static const T denom[35] = {
+      BOOST_MATH_STATIC const T denom[35] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 0.00000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 8.68331761881188649551819440128000000000000000000000000e+36)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 3.55043336733310191803732770947072000000000000000000000e+37)),
@@ -1405,10 +1409,10 @@ struct lanczos35MP : public std::integral_constant<int, 168>
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[35] = {
+      BOOST_MATH_STATIC const T num[35] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 2.84421398435712762388902267099927585742388886580864424e+37)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 3.28731583799033736725852757551292030085556435695468295e+37)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 1.84381150359300352571680869181416248982215282642834936e+37)),
@@ -1445,7 +1449,7 @@ struct lanczos35MP : public std::integral_constant<int, 168>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 3.28525092722679899458094768960179796663588010298597603e-10)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 3.28217919006153582429216342066702743329957749672852350e-13))
       };
-      static const T denom[35] = {
+      BOOST_MATH_STATIC const T denom[35] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 0.00000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 8.68331761881188649551819440128000000000000000000000000e+36)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 3.55043336733310191803732770947072000000000000000000000e+37)),
@@ -1488,10 +1492,10 @@ struct lanczos35MP : public std::integral_constant<int, 168>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[42] = {
+      BOOST_MATH_STATIC const T d[42] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 8.2258008829795701933757823508857131818190413131511363e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, -6.1680809698202901664719598422224259984110345848176138e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 2.0937956909159916126016144892534179459545368045658870e+02)),
@@ -1545,10 +1549,10 @@ struct lanczos35MP : public std::integral_constant<int, 168>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[42] = {
+      BOOST_MATH_STATIC const T d[42] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 7.3782193657165970743894979068466124765194827248379940e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, -5.5325256602067816772285455933211570612342576586214891e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 168, 1.8780522570799869937961476290263461833002660531646012e+03)),
@@ -1602,10 +1606,10 @@ struct lanczos35MP : public std::integral_constant<int, 168>
       return result;
    }
 
-   static double g() { return 2.96640371531248092651367187500000000000000000000000000e+01; }
+   BOOST_MATH_GPU_ENABLED static double g() { return 2.96640371531248092651367187500000000000000000000000000e+01; }
 };
 
-inline double lanczos_g_near_1_and_2(const lanczos35MP&)
+BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos35MP&)
 {
    return 22.36563469469547;
 }
@@ -1615,13 +1619,13 @@ inline double lanczos_g_near_1_and_2(const lanczos35MP&)
 // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at Oct 14 2019
 // Type precision was 201 bits or 63 max_digits10
 //
-struct lanczos48MP : public std::integral_constant<int, 201>
+struct lanczos48MP : public boost::math::integral_constant<int, 201>
 {
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[48] = {
+      BOOST_MATH_STATIC const T num[48] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 5.761757987425932419978923296640371540367427757167447418730589877e+70)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 8.723233313564421930629677035555276136256253817229396631458438691e+70)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 6.460052620548943146316510839385235752729444155384745952604400014e+70)),
@@ -1671,7 +1675,7 @@ struct lanczos48MP : public std::integral_constant<int, 201>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 3.749690888961891063146468955091435916957208840312184463551812828e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 2.506628274631000502415765284811045253006986740609938316629929233e+00))
       };
-      static const T denom[48] = {
+      BOOST_MATH_STATIC const T denom[48] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 0.000000000000000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 5.502622159812088949850305428800254892961651752960000000000000000e+57)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 2.430336111272256671478593169569751383305061494947840000000000000e+58)),
@@ -1726,10 +1730,10 @@ struct lanczos48MP : public std::integral_constant<int, 201>
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[48] = {
+      BOOST_MATH_STATIC const T num[48] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 1.775732062655417998910881298714821053061055705608286949609421120e+58)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 2.688437299644448784121592662352787426980194425446481703306505899e+58)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 1.990941408817264621124181941423397180231807676408175000011574647e+58)),
@@ -1779,7 +1783,7 @@ struct lanczos48MP : public std::integral_constant<int, 201>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 1.155627562127299657410444702080985966726894475302009989071093439e-09)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 7.725246714864934496649491688787278190129598018071339049048385845e-13))
       };
-      static const T denom[48] = {
+      BOOST_MATH_STATIC const T denom[48] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 0.000000000000000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 5.502622159812088949850305428800254892961651752960000000000000000e+57)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 2.430336111272256671478593169569751383305061494947840000000000000e+58)),
@@ -1835,10 +1839,10 @@ struct lanczos48MP : public std::integral_constant<int, 201>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[47] = {
+      BOOST_MATH_STATIC const T d[47] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 1.059629332377126683204423480567078764834299559082175332563440691e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, -1.045539783916612448318159279915745234781500064405838259582295756e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 4.784116147862702971548198855631720823614071322755242269800139953e+02)),
@@ -1897,10 +1901,10 @@ struct lanczos48MP : public std::integral_constant<int, 201>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[47] = {
+      BOOST_MATH_STATIC const T d[47] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 1.201442621036266842137537764128372139686555918574926377003612763e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, -1.185467427150643969519910927764836582205108528009141221591420898e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 201, 5.424388386017623557963301151646679462091516489317860889362683594e+03)),
@@ -1959,7 +1963,7 @@ struct lanczos48MP : public std::integral_constant<int, 201>
       return result;
    }
 
-   static double g() { return 2.880805098265409469604492187500000000000000000000000000000000000e+01; }
+   BOOST_MATH_GPU_ENABLED static double g() { return 2.880805098265409469604492187500000000000000000000000000000000000e+01; }
 };
 //
 // Lanczos Coefficients for N=49 G=3.531905273437499914734871708787977695465087890625000000000000000000000000e+01
@@ -1967,13 +1971,13 @@ struct lanczos48MP : public std::integral_constant<int, 201>
 // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 23 2021
 // Type precision was 234 bits or 72 max_digits10
 //
-struct lanczos49MP : public std::integral_constant<int, 234>
+struct lanczos49MP : public boost::math::integral_constant<int, 234>
 {
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[49] = {
+      BOOST_MATH_STATIC const T num[49] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 2.019754080776483553135944314398390557182640085494778723336498544843678485e+75)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 2.676059842235360762770131859925648183945167646928679564649946220888559950e+75)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.735650057396761011129552305882284776566019938011364428733911563803428382e+75)),
@@ -2024,7 +2028,7 @@ struct lanczos49MP : public std::integral_constant<int, 234>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 4.390800780998954208500039666019609185743083611214630479125238184115750385e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 2.506628274631000502415765284811045253006986740609938316629923576327386304e+00))
       };
-      static const T denom[49] = {
+      BOOST_MATH_STATIC const T denom[49] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 0.000000000000000000000000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 2.586232415111681806429643551536119799691976323891200000000000000000000000e+59)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.147760594457772724544789095126583405046340554378444800000000000000000000e+60)),
@@ -2080,10 +2084,10 @@ struct lanczos49MP : public std::integral_constant<int, 234>
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[49] = {
+      BOOST_MATH_STATIC const T num[49] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 9.256115936295239128792053510340342045264892843178101822334871337037830072e+59)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.226382973449509462464247401218271019985727521806127065773488938845990367e+60)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 7.954125855720840120393676022050001333138789037332565663424594891457273557e+59)),
@@ -2134,7 +2138,7 @@ struct lanczos49MP : public std::integral_constant<int, 234>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 2.012213341659767638341287600182102653785253052492980766472349845276996656e-12)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.148735984247176123115370642724455566337349193609892794757225210307646070e-15))
       };
-      static const T denom[49] = {
+      BOOST_MATH_STATIC const T denom[49] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 0.000000000000000000000000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 2.586232415111681806429643551536119799691976323891200000000000000000000000e+59)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.147760594457772724544789095126583405046340554378444800000000000000000000e+60)),
@@ -2191,10 +2195,10 @@ struct lanczos49MP : public std::integral_constant<int, 234>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[48] = {
+      BOOST_MATH_STATIC const T d[48] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.233965513689195496302526816415068018137532804347903252026160914018410959e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, -1.432567696701419045483804034990696504881298696037704685583731202573594084e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 7.800990151010204780591569831451389602736047219596430673280355834870101274e+02)),
@@ -2254,10 +2258,10 @@ struct lanczos49MP : public std::integral_constant<int, 234>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[48] = {
+      BOOST_MATH_STATIC const T d[48] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.614127734928823683399031924928203896697519780457812139739363243361356121e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, -1.873915620620241270111954934939697069495813017577862172724257417200307532e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 234, 1.020433263568799913803105156119729477192007677199414299858195073560627451e+04)),
@@ -2317,10 +2321,10 @@ struct lanczos49MP : public std::integral_constant<int, 234>
       return result;
    }
 
-   static double g() { return 3.531905273437499914734871708787977695465087890625000000000000000000000000e+01; }
+   BOOST_MATH_GPU_ENABLED static double g() { return 3.531905273437499914734871708787977695465087890625000000000000000000000000e+01; }
 };
 
-inline double lanczos_g_near_1_and_2(const lanczos49MP&)
+BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos49MP&)
 {
    return 33.54638671875000;
 }
@@ -2331,13 +2335,13 @@ inline double lanczos_g_near_1_and_2(const lanczos49MP&)
 // Generated with compiler: Microsoft Visual C++ version 14.2 on Win32 at May 22 2021
 // Type precision was 267 bits or 82 max_digits10
 //
-struct lanczos52MP : public std::integral_constant<int, 267>
+struct lanczos52MP : public boost::math::integral_constant<int, 267>
 {
    template <class T>
-   static T lanczos_sum(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[52] = {
+      BOOST_MATH_STATIC const T num[52] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 6.2155666558597192337239536765115831322604714024167432764126799013946738944179064162e+86)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 6.4127424062560995063147129656553600039438028633959646865531341376543275935920940510e+86)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 3.2432219642804430367752303997394644425738553439619047355470691880100895245432999409e+86)),
@@ -2391,7 +2395,7 @@ struct lanczos52MP : public std::integral_constant<int, 267>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 6.3192906485096381210566149918556620595525679738152760526187454875638091923687554946e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 2.5066282746310005024157652848110452530069867406099383166299235763422936546004304390e+00))
       };
-      static const T denom[52] = {
+      BOOST_MATH_STATIC const T denom[52] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 3.0414093201713378043612608166064768844377641568960512000000000000000000000000000000e+64)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3683925049359750564345782687270252191318781054337155072000000000000000000000000000e+65)),
@@ -2450,10 +2454,10 @@ struct lanczos52MP : public std::integral_constant<int, 267>
    }
 
    template <class T>
-   static T lanczos_sum_expG_scaled(const T& z)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_expG_scaled(const T& z)
    {
       // LCOV_EXCL_START
-      static const T num[52] = {
+      BOOST_MATH_STATIC const T num[52] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.2968364952374867351881152115042817894191583875220489481700563388077315440993668645e+65)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3379758994539627857606593702434364057385206718035611620158459666404856221820703129e+65)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 6.7667661507089657936560642518188013126674666141084536651063996312630940638352438169e+64)),
@@ -2507,7 +2511,7 @@ struct lanczos52MP : public std::integral_constant<int, 267>
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3184778139696006596104645792244972612333458493576785210966728195969324996631733257e-18)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 5.2299125832253333486600023635817464870204660970908989075481425992405717273229096642e-22))
       };
-      static const T denom[52] = {
+      BOOST_MATH_STATIC const T denom[52] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000e+00)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 3.0414093201713378043612608166064768844377641568960512000000000000000000000000000000e+64)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.3683925049359750564345782687270252191318781054337155072000000000000000000000000000e+65)),
@@ -2567,10 +2571,10 @@ struct lanczos52MP : public std::integral_constant<int, 267>
 
 
    template<class T>
-   static T lanczos_sum_near_1(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_1(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[56] = {
+      BOOST_MATH_STATIC const T d[56] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.4249481633301349696310814410227012806541100102720500928500445853537331413655453290e+01)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, -1.9263209672927829270913652941762375058727326960303110137656951784697992824730035351e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.2326134462101140657073655882621393643823409472993225649429843685598155061860815843e+03)),
@@ -2638,10 +2642,10 @@ struct lanczos52MP : public std::integral_constant<int, 267>
    }
 
    template<class T>
-   static T lanczos_sum_near_2(const T& dz)
+   BOOST_MATH_GPU_ENABLED static T lanczos_sum_near_2(const T& dz)
    {
       // LCOV_EXCL_START
-      static const T d[56] = {
+      BOOST_MATH_STATIC const T d[56] = {
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 2.1359871474796665853092357455924330354587340093067807143261699873815704783987359772e+02)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, -2.8875414095359657817766255009397774415784763914903057809977502598124862632510767554e+03)),
          static_cast<T>(BOOST_MATH_BIG_CONSTANT(T, 267, 1.8476787764422274017528261804071971508619123082396685980448133660376964287516316704e+04)),
@@ -2709,10 +2713,10 @@ struct lanczos52MP : public std::integral_constant<int, 267>
       return result;
    }
 
-   static double g() { return 4.9921416015624998863131622783839702606201171875000000000000000000000000000000000000e+01; }
+   BOOST_MATH_GPU_ENABLED static double g() { return 4.9921416015624998863131622783839702606201171875000000000000000000000000000000000000e+01; }
 };
 
-inline double lanczos_g_near_1_and_2(const lanczos52MP&)
+BOOST_MATH_GPU_ENABLED inline double lanczos_g_near_1_and_2(const lanczos52MP&)
 {
    return 38.73733398437500;
 }
@@ -2721,24 +2725,24 @@ inline double lanczos_g_near_1_and_2(const lanczos52MP&)
 //
 // placeholder for no lanczos info available:
 //
-struct undefined_lanczos : public std::integral_constant<int, (std::numeric_limits<int>::max)() - 1> { };
+struct undefined_lanczos : public boost::math::integral_constant<int, (boost::math::numeric_limits<int>::max)() - 1> { };
 
 template <class Real, class Policy>
 struct lanczos
 {
-   static constexpr auto target_precision = policies::precision<Real, Policy>::type::value <= 0 ? (std::numeric_limits<int>::max)()-2 : 
+   BOOST_MATH_STATIC constexpr auto target_precision = policies::precision<Real, Policy>::type::value <= 0 ? (boost::math::numeric_limits<int>::max)()-2 : 
                                                                                                    policies::precision<Real, Policy>::type::value;
 
-   using type = typename std::conditional<(target_precision <= lanczos6m24::value), lanczos6m24, 
-                typename std::conditional<(target_precision <= lanczos13m53::value), lanczos13m53,
-                typename std::conditional<(target_precision <= lanczos11::value), lanczos11,
-                typename std::conditional<(target_precision <= lanczos17m64::value), lanczos17m64,
-                typename std::conditional<(target_precision <= lanczos24m113::value), lanczos24m113,
-                typename std::conditional<(target_precision <= lanczos27MP::value), lanczos27MP,
-                typename std::conditional<(target_precision <= lanczos35MP::value), lanczos35MP,
-                typename std::conditional<(target_precision <= lanczos48MP::value), lanczos48MP,
-                typename std::conditional<(target_precision <= lanczos49MP::value), lanczos49MP,
-                typename std::conditional<(target_precision <= lanczos52MP::value), lanczos52MP, undefined_lanczos>::type
+   using type = typename boost::math::conditional<(target_precision <= lanczos6m24::value), lanczos6m24, 
+                typename boost::math::conditional<(target_precision <= lanczos13m53::value), lanczos13m53,
+                typename boost::math::conditional<(target_precision <= lanczos11::value), lanczos11,
+                typename boost::math::conditional<(target_precision <= lanczos17m64::value), lanczos17m64,
+                typename boost::math::conditional<(target_precision <= lanczos24m113::value), lanczos24m113,
+                typename boost::math::conditional<(target_precision <= lanczos27MP::value), lanczos27MP,
+                typename boost::math::conditional<(target_precision <= lanczos35MP::value), lanczos35MP,
+                typename boost::math::conditional<(target_precision <= lanczos48MP::value), lanczos48MP,
+                typename boost::math::conditional<(target_precision <= lanczos49MP::value), lanczos49MP,
+                typename boost::math::conditional<(target_precision <= lanczos52MP::value), lanczos52MP, undefined_lanczos>::type
                 >::type>::type>::type>::type>::type>::type>::type>::type
                 >::type;
 };
@@ -2748,7 +2752,7 @@ struct lanczos
 } // namespace boost
 
 #if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
-#if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED)
+#if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
 #include <boost/math/special_functions/detail/lanczos_sse2.hpp>
 #endif
 #endif
diff --git a/include/boost/math/special_functions/log1p.hpp b/include/boost/math/special_functions/log1p.hpp
index 9b8a8e0eb7..758f606687 100644
--- a/include/boost/math/special_functions/log1p.hpp
+++ b/include/boost/math/special_functions/log1p.hpp
@@ -12,13 +12,14 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
-#include <cmath>
-#include <cstdint>
-#include <limits>
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/series.hpp>
 #include <boost/math/tools/rational.hpp>
 #include <boost/math/tools/big_constant.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/promotion.hpp>
+#include <boost/math/tools/precision.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/assert.hpp>
@@ -47,16 +48,16 @@ namespace detail
   {
      typedef T result_type;
 
-     log1p_series(T x)
+     BOOST_MATH_GPU_ENABLED log1p_series(T x)
         : k(0), m_mult(-x), m_prod(-1){}
 
-     T operator()()
+     BOOST_MATH_GPU_ENABLED T operator()()
      {
         m_prod *= m_mult;
         return m_prod / ++k;
      }
 
-     int count()const
+     BOOST_MATH_GPU_ENABLED int count()const
      {
         return k;
      }
@@ -79,12 +80,12 @@ namespace detail
 // it performs no better than log(1+x): which is to say not very well at all.
 //
 template <class T, class Policy>
-T log1p_imp(T const & x, const Policy& pol, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED T log1p_imp(T const & x, const Policy& pol, const boost::math::integral_constant<int, 0>&)
 { // The function returns the natural logarithm of 1 + x.
    typedef typename tools::promote_args<T>::type result_type;
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::log1p<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1p<%1%>(%1%)";
 
    if((x < -1) || (boost::math::isnan)(x))
       return policies::raise_domain_error<T>(
@@ -101,7 +102,7 @@ T log1p_imp(T const & x, const Policy& pol, const std::integral_constant<int, 0>
    if(a < tools::epsilon<result_type>())
       return x;
    detail::log1p_series<result_type> s(x);
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
    result_type result = tools::sum_series(s, policies::get_epsilon<result_type, Policy>(), max_iter);
 
@@ -110,11 +111,11 @@ T log1p_imp(T const & x, const Policy& pol, const std::integral_constant<int, 0>
 }
 
 template <class T, class Policy>
-T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 53>&)
+BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const boost::math::integral_constant<int, 53>&)
 { // The function returns the natural logarithm of 1 + x.
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::log1p<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1p<%1%>(%1%)";
 
    if(x < -1)
       return policies::raise_domain_error<T>(
@@ -135,7 +136,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 53>
    // Expected Error Term:                         1.843e-017
    // Maximum Relative Change in Control Points:   8.138e-004
    // Max Error found at double precision =        3.250766e-016
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
        static_cast<T>(0.15141069795941984e-16L),
        static_cast<T>(0.35495104378055055e-15L),
        static_cast<T>(0.33333333333332835L),
@@ -145,7 +146,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 53>
        static_cast<T>(0.13703234928513215L),
        static_cast<T>(0.011294864812099712L)
      };
-   static const T Q[] = {
+   BOOST_MATH_STATIC const T Q[] = {
        static_cast<T>(1L),
        static_cast<T>(3.7274719063011499L),
        static_cast<T>(5.5387948649720334L),
@@ -163,11 +164,11 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 53>
 }
 
 template <class T, class Policy>
-T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 64>&)
+BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const boost::math::integral_constant<int, 64>&)
 { // The function returns the natural logarithm of 1 + x.
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::log1p<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1p<%1%>(%1%)";
 
    if(x < -1)
       return policies::raise_domain_error<T>(
@@ -188,7 +189,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 64>
    // Expected Error Term:                         8.088e-20
    // Maximum Relative Change in Control Points:   9.648e-05
    // Max Error found at long double precision =   2.242324e-19
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.807533446680736736712e-19),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.490881544804798926426e-18),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.333333333333333373941),
@@ -199,7 +200,7 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 64>
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.0706537026422828914622),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.00441709903782239229447)
    };
-   static const T Q[] = {
+   BOOST_MATH_STATIC const T Q[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, 4.26423872346263928361),
       BOOST_MATH_BIG_CONSTANT(T, 64, 7.48189472704477708962),
@@ -218,11 +219,11 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 64>
 }
 
 template <class T, class Policy>
-T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 24>&)
+BOOST_MATH_GPU_ENABLED T log1p_imp(T const& x, const Policy& pol, const boost::math::integral_constant<int, 24>&)
 { // The function returns the natural logarithm of 1 + x.
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::log1p<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1p<%1%>(%1%)";
 
    if(x < -1)
       return policies::raise_domain_error<T>(
@@ -244,13 +245,13 @@ T log1p_imp(T const& x, const Policy& pol, const std::integral_constant<int, 24>
    // Maximum Relative Change in Control Points:   2.509e-04
    // Max Error found at double precision =        6.910422e-08
    // Max Error found at float precision =         8.357242e-08
-   static const T P[] = {
+   BOOST_MATH_STATIC const T P[] = {
       -0.671192866803148236519e-7L,
       0.119670999140731844725e-6L,
       0.333339469182083148598L,
       0.237827183019664122066L
    };
-   static const T Q[] = {
+   BOOST_MATH_STATIC const T Q[] = {
       1L,
       1.46348272586988539733L,
       0.497859871350117338894L,
@@ -268,22 +269,24 @@ struct log1p_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          do_init(tag());
       }
       template <int N>
-      static void do_init(const std::integral_constant<int, N>&){}
-      static void do_init(const std::integral_constant<int, 64>&)
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, N>&){}
+      BOOST_MATH_GPU_ENABLED static void do_init(const boost::math::integral_constant<int, 64>&)
       {
          boost::math::log1p(static_cast<T>(0.25), Policy());
       }
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
-   static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_STATIC const init initializer;
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -294,7 +297,7 @@ const typename log1p_initializer<T, Policy, tag>::init log1p_initializer<T, Poli
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type log1p(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type log1p(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -306,7 +309,7 @@ inline typename tools::promote_args<T>::type log1p(T x, const Policy&)
       policies::discrete_quantile<>,
       policies::assert_undefined<> >::type forwarding_policy;
 
-   typedef std::integral_constant<int,
+   typedef boost::math::integral_constant<int,
       precision_type::value <= 0 ? 0 :
       precision_type::value <= 53 ? 53 :
       precision_type::value <= 64 ? 64 : 0
@@ -328,7 +331,7 @@ inline typename tools::promote_args<T>::type log1p(T x, const Policy&)
 #if defined(BOOST_HAS_LOG1P) && !(defined(__osf__) && defined(__DECCXX_VER))
 #  ifdef BOOST_MATH_USE_C99
 template <class Policy>
-inline float log1p(float x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline float log1p(float x, const Policy& pol)
 {
    if(x < -1)
       return policies::raise_domain_error<float>(
@@ -340,7 +343,7 @@ inline float log1p(float x, const Policy& pol)
 }
 #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
 template <class Policy>
-inline long double log1p(long double x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long double log1p(long double x, const Policy& pol)
 {
    if(x < -1)
       return policies::raise_domain_error<long double>(
@@ -365,7 +368,7 @@ inline float log1p(float x, const Policy& pol)
 }
 #endif
 template <class Policy>
-inline double log1p(double x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline double log1p(double x, const Policy& pol)
 {
    if(x < -1)
       return policies::raise_domain_error<double>(
@@ -425,7 +428,7 @@ inline long double log1p(long double x, const Policy& pol)
 #endif
 
 template <class T>
-inline typename tools::promote_args<T>::type log1p(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type log1p(T x)
 {
    return boost::math::log1p(x, policies::policy<>());
 }
@@ -433,12 +436,12 @@ inline typename tools::promote_args<T>::type log1p(T x)
 // Compute log(1+x)-x:
 //
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    log1pmx(T x, const Policy& pol)
 {
    typedef typename tools::promote_args<T>::type result_type;
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::log1pmx<%1%>(%1%)";
+   constexpr auto function = "boost::math::log1pmx<%1%>(%1%)";
 
    if(x < -1)
       return policies::raise_domain_error<T>(
@@ -456,7 +459,7 @@ inline typename tools::promote_args<T>::type
       return -x * x / 2;
    boost::math::detail::log1p_series<T> s(x);
    s();
-   std::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
+   boost::math::uintmax_t max_iter = policies::get_max_series_iterations<Policy>();
 
    T result = boost::math::tools::sum_series(s, policies::get_epsilon<T, Policy>(), max_iter);
 
@@ -465,7 +468,7 @@ inline typename tools::promote_args<T>::type
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type log1pmx(T x)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type log1pmx(T x)
 {
    return log1pmx(x, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/math_fwd.hpp b/include/boost/math/special_functions/math_fwd.hpp
index 3e5d6a7625..91dde74ccc 100644
--- a/include/boost/math/special_functions/math_fwd.hpp
+++ b/include/boost/math/special_functions/math_fwd.hpp
@@ -4,6 +4,7 @@
 
 // Copyright Paul A. Bristow 2006.
 // Copyright John Maddock 2006.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -23,11 +24,91 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/promotion.hpp> // for argument promotion.
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/policies/policy.hpp>
+
+#ifdef BOOST_MATH_HAS_NVRTC
+
+namespace boost {
+namespace math {
+
+template <class RT1, class RT2, class A>
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<RT1, RT2, A>::type
+beta(RT1 a, RT2 b, A arg);
+
+namespace detail{
+
+   template <class T, class U, class V>
+   struct ellint_3_result
+   {
+      using type = typename boost::math::conditional<
+         policies::is_policy<V>::value,
+         tools::promote_args_t<T, U>,
+         tools::promote_args_t<T, U, V>
+      >::type;
+   };
+
+   template <class T, class U>
+   struct expint_result
+   {
+      using type = typename boost::math::conditional<
+         policies::is_policy<U>::value,
+         tools::promote_args_t<T>,
+         typename tools::promote_args<U>::type
+      >::type;
+   };
+
+   typedef boost::math::integral_constant<int, 0> bessel_no_int_tag;      // No integer optimisation possible.
+   typedef boost::math::integral_constant<int, 1> bessel_maybe_int_tag;   // Maybe integer optimisation.
+   typedef boost::math::integral_constant<int, 2> bessel_int_tag;         // Definite integer optimisation.
+
+   template <class T1, class T2, class Policy>
+   struct bessel_traits
+   {
+      using result_type = typename boost::math::conditional<
+         boost::math::is_integral<T1>::value,
+         typename tools::promote_args<T2>::type,
+         tools::promote_args_t<T1, T2>
+      >::type;
+
+      typedef typename policies::precision<result_type, Policy>::type precision_type;
+
+      using optimisation_tag = typename boost::math::conditional<
+         (precision_type::value <= 0 || precision_type::value > 64),
+         bessel_no_int_tag,
+         typename boost::math::conditional<
+            boost::math::is_integral<T1>::value,
+            bessel_int_tag,
+            bessel_maybe_int_tag
+         >::type
+      >::type;
+
+      using optimisation_tag128 = typename boost::math::conditional<
+         (precision_type::value <= 0 || precision_type::value > 113),
+         bessel_no_int_tag,
+         typename boost::math::conditional<
+            boost::math::is_integral<T1>::value,
+            bessel_int_tag,
+            bessel_maybe_int_tag
+         >::type
+      >::type;
+   };
+
+} // namespace detail
+
+} // namespace math
+} // namespace boost
+
+#else
+
 #include <vector>
 #include <complex>
 #include <type_traits>
 #include <boost/math/special_functions/detail/round_fwd.hpp>
-#include <boost/math/tools/promotion.hpp> // for argument promotion.
+#include <boost/math/tools/type_traits.hpp>
 #include <boost/math/policies/policy.hpp>
 
 #define BOOST_NO_MACRO_EXPAND /**/
@@ -39,139 +120,139 @@ namespace boost
 
    // Beta functions.
    template <class RT1, class RT2>
-   tools::promote_args_t<RT1, RT2>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2>
          beta(RT1 a, RT2 b); // Beta function (2 arguments).
 
    template <class RT1, class RT2, class A>
-   tools::promote_args_t<RT1, RT2, A>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, A>
          beta(RT1 a, RT2 b, A x); // Beta function (3 arguments).
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          beta(RT1 a, RT2 b, RT3 x, const Policy& pol); // Beta function (3 arguments).
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          betac(RT1 a, RT2 b, RT3 x);
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          betac(RT1 a, RT2 b, RT3 x, const Policy& pol);
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta(RT1 a, RT2 b, RT3 x); // Incomplete beta function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta(RT1 a, RT2 b, RT3 x, const Policy& pol); // Incomplete beta function.
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac(RT1 a, RT2 b, RT3 x); // Incomplete beta complement function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac(RT1 a, RT2 b, RT3 x, const Policy& pol); // Incomplete beta complement function.
 
    template <class T1, class T2, class T3, class T4>
-   tools::promote_args_t<T1, T2, T3, T4>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3, T4>
          ibeta_inv(T1 a, T2 b, T3 p, T4* py);
 
    template <class T1, class T2, class T3, class T4, class Policy>
-   tools::promote_args_t<T1, T2, T3, T4>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3, T4>
          ibeta_inv(T1 a, T2 b, T3 p, T4* py, const Policy& pol);
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_inv(RT1 a, RT2 b, RT3 p); // Incomplete beta inverse function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_inv(RT1 a, RT2 b, RT3 p, const Policy&); // Incomplete beta inverse function.
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_inva(RT1 a, RT2 b, RT3 p); // Incomplete beta inverse function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_inva(RT1 a, RT2 b, RT3 p, const Policy&); // Incomplete beta inverse function.
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_invb(RT1 a, RT2 b, RT3 p); // Incomplete beta inverse function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_invb(RT1 a, RT2 b, RT3 p, const Policy&); // Incomplete beta inverse function.
 
    template <class T1, class T2, class T3, class T4>
-   tools::promote_args_t<T1, T2, T3, T4>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3, T4>
          ibetac_inv(T1 a, T2 b, T3 q, T4* py);
 
    template <class T1, class T2, class T3, class T4, class Policy>
-   tools::promote_args_t<T1, T2, T3, T4>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3, T4>
          ibetac_inv(T1 a, T2 b, T3 q, T4* py, const Policy& pol);
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac_inv(RT1 a, RT2 b, RT3 q); // Incomplete beta complement inverse function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac_inv(RT1 a, RT2 b, RT3 q, const Policy&); // Incomplete beta complement inverse function.
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac_inva(RT1 a, RT2 b, RT3 q); // Incomplete beta complement inverse function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac_inva(RT1 a, RT2 b, RT3 q, const Policy&); // Incomplete beta complement inverse function.
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac_invb(RT1 a, RT2 b, RT3 q); // Incomplete beta complement inverse function.
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibetac_invb(RT1 a, RT2 b, RT3 q, const Policy&); // Incomplete beta complement inverse function.
 
    template <class RT1, class RT2, class RT3>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_derivative(RT1 a, RT2 b, RT3 x);  // derivative of incomplete beta
 
    template <class RT1, class RT2, class RT3, class Policy>
-   tools::promote_args_t<RT1, RT2, RT3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2, RT3>
          ibeta_derivative(RT1 a, RT2 b, RT3 x, const Policy& pol);  // derivative of incomplete beta
 
    // Binomial:
    template <class T, class Policy>
-   T binomial_coefficient(unsigned n, unsigned k, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k, const Policy& pol);
    template <class T>
-   T binomial_coefficient(unsigned n, unsigned k);
+   BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k);
 
    // erf & erfc error functions.
    template <class RT> // Error function.
-   tools::promote_args_t<RT> erf(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erf(RT z);
    template <class RT, class Policy> // Error function.
-   tools::promote_args_t<RT> erf(RT z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erf(RT z, const Policy&);
 
    template <class RT>// Error function complement.
-   tools::promote_args_t<RT> erfc(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erfc(RT z);
    template <class RT, class Policy>// Error function complement.
-   tools::promote_args_t<RT> erfc(RT z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erfc(RT z, const Policy&);
 
    template <class RT>// Error function inverse.
-   tools::promote_args_t<RT> erf_inv(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erf_inv(RT z);
    template <class RT, class Policy>// Error function inverse.
-   tools::promote_args_t<RT> erf_inv(RT z, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erf_inv(RT z, const Policy& pol);
 
    template <class RT>// Error function complement inverse.
-   tools::promote_args_t<RT> erfc_inv(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erfc_inv(RT z);
    template <class RT, class Policy>// Error function complement inverse.
-   tools::promote_args_t<RT> erfc_inv(RT z, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> erfc_inv(RT z, const Policy& pol);
 
    // Polynomials:
    template <class T1, class T2, class T3>
@@ -250,15 +331,15 @@ namespace boost
       laguerre(unsigned n, T1 m, T2 x);
 
    template <class T>
-   tools::promote_args_t<T>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T>
       hermite(unsigned n, T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T>
       hermite(unsigned n, T x, const Policy& pol);
 
    template <class T1, class T2, class T3>
-   tools::promote_args_t<T1, T2, T3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3>
       hermite_next(unsigned n, T1 x, T2 Hn, T3 Hnm1);
 
    template<class T1, class T2, class T3>
@@ -311,90 +392,90 @@ namespace boost
 
    // Elliptic integrals:
    template <class T1, class T2, class T3>
-   tools::promote_args_t<T1, T2, T3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3>
          ellint_rf(T1 x, T2 y, T3 z);
 
    template <class T1, class T2, class T3, class Policy>
-   tools::promote_args_t<T1, T2, T3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3>
          ellint_rf(T1 x, T2 y, T3 z, const Policy& pol);
 
    template <class T1, class T2, class T3>
-   tools::promote_args_t<T1, T2, T3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3>
          ellint_rd(T1 x, T2 y, T3 z);
 
    template <class T1, class T2, class T3, class Policy>
-   tools::promote_args_t<T1, T2, T3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3>
          ellint_rd(T1 x, T2 y, T3 z, const Policy& pol);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
          ellint_rc(T1 x, T2 y);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
          ellint_rc(T1 x, T2 y, const Policy& pol);
 
    template <class T1, class T2, class T3, class T4>
-   tools::promote_args_t<T1, T2, T3, T4>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3, T4>
          ellint_rj(T1 x, T2 y, T3 z, T4 p);
 
    template <class T1, class T2, class T3, class T4, class Policy>
-   tools::promote_args_t<T1, T2, T3, T4>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3, T4>
          ellint_rj(T1 x, T2 y, T3 z, T4 p, const Policy& pol);
 
    template <class T1, class T2, class T3>
-   tools::promote_args_t<T1, T2, T3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3>
       ellint_rg(T1 x, T2 y, T3 z);
 
    template <class T1, class T2, class T3, class Policy>
-   tools::promote_args_t<T1, T2, T3>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3>
       ellint_rg(T1 x, T2 y, T3 z, const Policy& pol);
 
    template <typename T>
-   tools::promote_args_t<T> ellint_2(T k);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> ellint_2(T k);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> ellint_2(T1 k, T2 phi);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> ellint_2(T1 k, T2 phi);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> ellint_2(T1 k, T2 phi, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> ellint_2(T1 k, T2 phi, const Policy& pol);
 
    template <typename T>
-   tools::promote_args_t<T> ellint_1(T k);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> ellint_1(T k);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> ellint_1(T1 k, T2 phi);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> ellint_1(T1 k, T2 phi);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> ellint_1(T1 k, T2 phi, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> ellint_1(T1 k, T2 phi, const Policy& pol);
 
    template <typename T>
-   tools::promote_args_t<T> ellint_d(T k);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> ellint_d(T k);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> ellint_d(T1 k, T2 phi);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> ellint_d(T1 k, T2 phi);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> ellint_d(T1 k, T2 phi, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> ellint_d(T1 k, T2 phi, const Policy& pol);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> jacobi_zeta(T1 k, T2 phi);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> jacobi_zeta(T1 k, T2 phi);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> jacobi_zeta(T1 k, T2 phi, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> jacobi_zeta(T1 k, T2 phi, const Policy& pol);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> heuman_lambda(T1 k, T2 phi);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> heuman_lambda(T1 k, T2 phi);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> heuman_lambda(T1 k, T2 phi, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> heuman_lambda(T1 k, T2 phi, const Policy& pol);
 
    namespace detail{
 
    template <class T, class U, class V>
    struct ellint_3_result
    {
-      using type = typename std::conditional<
+      using type = typename boost::math::conditional<
          policies::is_policy<V>::value,
          tools::promote_args_t<T, U>,
          tools::promote_args_t<T, U, V>
@@ -405,28 +486,28 @@ namespace boost
 
 
    template <class T1, class T2, class T3>
-   typename detail::ellint_3_result<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi);
+   BOOST_MATH_GPU_ENABLED typename detail::ellint_3_result<T1, T2, T3>::type ellint_3(T1 k, T2 v, T3 phi);
 
    template <class T1, class T2, class T3, class Policy>
-   tools::promote_args_t<T1, T2, T3> ellint_3(T1 k, T2 v, T3 phi, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2, T3> ellint_3(T1 k, T2 v, T3 phi, const Policy& pol);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> ellint_3(T1 k, T2 v);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> ellint_3(T1 k, T2 v);
 
    // Factorial functions.
    // Note: not for integral types, at present.
    template <class RT>
    struct max_factorial;
    template <class RT>
-   RT factorial(unsigned int);
+   BOOST_MATH_GPU_ENABLED RT factorial(unsigned int);
    template <class RT, class Policy>
-   RT factorial(unsigned int, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED RT factorial(unsigned int, const Policy& pol);
    template <class RT>
-   RT unchecked_factorial(unsigned int BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(RT));
+   BOOST_MATH_GPU_ENABLED RT unchecked_factorial(unsigned int BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(RT));
    template <class RT>
-   RT double_factorial(unsigned i);
+   BOOST_MATH_GPU_ENABLED RT double_factorial(unsigned i);
    template <class RT, class Policy>
-   RT double_factorial(unsigned i, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED RT double_factorial(unsigned i, const Policy& pol);
 
    template <class RT>
    tools::promote_args_t<RT> falling_factorial(RT x, unsigned n);
@@ -442,106 +523,106 @@ namespace boost
 
    // Gamma functions.
    template <class RT>
-   tools::promote_args_t<RT> tgamma(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> tgamma(RT z);
 
    template <class RT>
-   tools::promote_args_t<RT> tgamma1pm1(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> tgamma1pm1(RT z);
 
    template <class RT, class Policy>
-   tools::promote_args_t<RT> tgamma1pm1(RT z, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> tgamma1pm1(RT z, const Policy& pol);
 
    template <class RT1, class RT2>
-   tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z);
 
    template <class RT1, class RT2, class Policy>
-   tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z, const Policy& pol);
 
    template <class RT>
-   tools::promote_args_t<RT> lgamma(RT z, int* sign);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> lgamma(RT z, int* sign);
 
    template <class RT, class Policy>
-   tools::promote_args_t<RT> lgamma(RT z, int* sign, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> lgamma(RT z, int* sign, const Policy& pol);
 
    template <class RT>
-   tools::promote_args_t<RT> lgamma(RT x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> lgamma(RT x);
 
    template <class RT, class Policy>
-   tools::promote_args_t<RT> lgamma(RT x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> lgamma(RT x, const Policy& pol);
 
    template <class RT1, class RT2>
-   tools::promote_args_t<RT1, RT2> tgamma_lower(RT1 a, RT2 z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> tgamma_lower(RT1 a, RT2 z);
 
    template <class RT1, class RT2, class Policy>
-   tools::promote_args_t<RT1, RT2> tgamma_lower(RT1 a, RT2 z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> tgamma_lower(RT1 a, RT2 z, const Policy&);
 
    template <class RT1, class RT2>
-   tools::promote_args_t<RT1, RT2> gamma_q(RT1 a, RT2 z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> gamma_q(RT1 a, RT2 z);
 
    template <class RT1, class RT2, class Policy>
-   tools::promote_args_t<RT1, RT2> gamma_q(RT1 a, RT2 z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> gamma_q(RT1 a, RT2 z, const Policy&);
 
    template <class RT1, class RT2>
-   tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z);
 
    template <class RT1, class RT2, class Policy>
-   tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z, const Policy&);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> tgamma_delta_ratio(T1 z, T2 delta);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> tgamma_delta_ratio(T1 z, T2 delta);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> tgamma_delta_ratio(T1 z, T2 delta, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> tgamma_delta_ratio(T1 z, T2 delta, const Policy&);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> tgamma_ratio(T1 a, T2 b);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> tgamma_ratio(T1 a, T2 b);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> tgamma_ratio(T1 a, T2 b, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> tgamma_ratio(T1 a, T2 b, const Policy&);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> gamma_p_derivative(T1 a, T2 x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_p_derivative(T1 a, T2 x);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> gamma_p_derivative(T1 a, T2 x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_p_derivative(T1 a, T2 x, const Policy&);
 
    // gamma inverse.
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> gamma_p_inv(T1 a, T2 p);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_p_inv(T1 a, T2 p);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> gamma_p_inva(T1 a, T2 p, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_p_inva(T1 a, T2 p, const Policy&);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> gamma_p_inva(T1 a, T2 p);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_p_inva(T1 a, T2 p);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> gamma_p_inv(T1 a, T2 p, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_p_inv(T1 a, T2 p, const Policy&);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> gamma_q_inv(T1 a, T2 q);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_q_inv(T1 a, T2 q);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> gamma_q_inv(T1 a, T2 q, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_q_inv(T1 a, T2 q, const Policy&);
 
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2> gamma_q_inva(T1 a, T2 q);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_q_inva(T1 a, T2 q);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2> gamma_q_inva(T1 a, T2 q, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2> gamma_q_inva(T1 a, T2 q, const Policy&);
 
    // digamma:
    template <class T>
-   tools::promote_args_t<T> digamma(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> digamma(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> digamma(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> digamma(T x, const Policy&);
 
    // trigamma:
    template <class T>
-   tools::promote_args_t<T> trigamma(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> trigamma(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> trigamma(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> trigamma(T x, const Policy&);
 
    // polygamma:
    template <class T>
@@ -552,63 +633,63 @@ namespace boost
 
    // Hypotenuse function sqrt(x ^ 2 + y ^ 2).
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
          hypot(T1 x, T2 y);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
          hypot(T1 x, T2 y, const Policy&);
 
    // cbrt - cube root.
    template <class RT>
-   tools::promote_args_t<RT> cbrt(RT z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> cbrt(RT z);
 
    template <class RT, class Policy>
-   tools::promote_args_t<RT> cbrt(RT z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<RT> cbrt(RT z, const Policy&);
 
    // log1p is log(x + 1)
    template <class T>
-   tools::promote_args_t<T> log1p(T);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> log1p(T);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> log1p(T, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> log1p(T, const Policy&);
 
    // log1pmx is log(x + 1) - x
    template <class T>
-   tools::promote_args_t<T> log1pmx(T);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> log1pmx(T);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> log1pmx(T, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> log1pmx(T, const Policy&);
 
    // Exp (x) minus 1 functions.
    template <class T>
-   tools::promote_args_t<T> expm1(T);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> expm1(T);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> expm1(T, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> expm1(T, const Policy&);
 
    // Power - 1
    template <class T1, class T2>
-   tools::promote_args_t<T1, T2>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
          powm1(const T1 a, const T2 z);
 
    template <class T1, class T2, class Policy>
-   tools::promote_args_t<T1, T2>
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T1, T2>
          powm1(const T1 a, const T2 z, const Policy&);
 
    // sqrt(1+x) - 1
    template <class T>
-   tools::promote_args_t<T> sqrt1pm1(const T& val);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sqrt1pm1(const T& val);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> sqrt1pm1(const T& val, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sqrt1pm1(const T& val, const Policy&);
 
    // sinus cardinals:
    template <class T>
-   tools::promote_args_t<T> sinc_pi(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sinc_pi(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> sinc_pi(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sinc_pi(T x, const Policy&);
 
    template <class T>
    tools::promote_args_t<T> sinhc_pi(T x);
@@ -630,43 +711,43 @@ namespace boost
    tools::promote_args_t<T> acosh(T x, const Policy&);
 
    template<typename T>
-   tools::promote_args_t<T> atanh(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> atanh(T x);
 
    template<typename T, class Policy>
-   tools::promote_args_t<T> atanh(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> atanh(T x, const Policy&);
 
    namespace detail{
 
-      typedef std::integral_constant<int, 0> bessel_no_int_tag;      // No integer optimisation possible.
-      typedef std::integral_constant<int, 1> bessel_maybe_int_tag;   // Maybe integer optimisation.
-      typedef std::integral_constant<int, 2> bessel_int_tag;         // Definite integer optimisation.
+      typedef boost::math::integral_constant<int, 0> bessel_no_int_tag;      // No integer optimisation possible.
+      typedef boost::math::integral_constant<int, 1> bessel_maybe_int_tag;   // Maybe integer optimisation.
+      typedef boost::math::integral_constant<int, 2> bessel_int_tag;         // Definite integer optimisation.
 
       template <class T1, class T2, class Policy>
       struct bessel_traits
       {
-         using result_type = typename std::conditional<
-            std::is_integral<T1>::value,
+         using result_type = typename boost::math::conditional<
+            boost::math::is_integral<T1>::value,
             typename tools::promote_args<T2>::type,
             tools::promote_args_t<T1, T2>
          >::type;
 
          typedef typename policies::precision<result_type, Policy>::type precision_type;
 
-         using optimisation_tag = typename std::conditional<
+         using optimisation_tag = typename boost::math::conditional<
             (precision_type::value <= 0 || precision_type::value > 64),
             bessel_no_int_tag,
-            typename std::conditional<
-               std::is_integral<T1>::value,
+            typename boost::math::conditional<
+               boost::math::is_integral<T1>::value,
                bessel_int_tag,
                bessel_maybe_int_tag
             >::type
          >::type;
 
-         using optimisation_tag128 = typename std::conditional<
+         using optimisation_tag128 = typename boost::math::conditional<
             (precision_type::value <= 0 || precision_type::value > 113),
             bessel_no_int_tag,
-            typename std::conditional<
-               std::is_integral<T1>::value,
+            typename boost::math::conditional<
+               boost::math::is_integral<T1>::value,
                bessel_int_tag,
                bessel_maybe_int_tag
             >::type
@@ -676,223 +757,225 @@ namespace boost
 
    // Bessel functions:
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_j(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_j(T1 v, T2 x, const Policy& pol);
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_j_prime(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_j_prime(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_j(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_j(T1 v, T2 x);
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_j_prime(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_j_prime(T1 v, T2 x);
 
    template <class T, class Policy>
-   typename detail::bessel_traits<T, T, Policy>::result_type sph_bessel(unsigned v, T x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, Policy>::result_type sph_bessel(unsigned v, T x, const Policy& pol);
    template <class T, class Policy>
-   typename detail::bessel_traits<T, T, Policy>::result_type sph_bessel_prime(unsigned v, T x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, Policy>::result_type sph_bessel_prime(unsigned v, T x, const Policy& pol);
 
    template <class T>
-   typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_bessel(unsigned v, T x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_bessel(unsigned v, T x);
    template <class T>
-   typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_bessel_prime(unsigned v, T x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_bessel_prime(unsigned v, T x);
 
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_i(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_i(T1 v, T2 x, const Policy& pol);
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_i_prime(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_i_prime(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_i(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_i(T1 v, T2 x);
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_i_prime(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_i_prime(T1 v, T2 x);
 
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_k(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_k(T1 v, T2 x, const Policy& pol);
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_k_prime(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_bessel_k_prime(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_k(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_k(T1 v, T2 x);
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_k_prime(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_bessel_k_prime(T1 v, T2 x);
 
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_neumann(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_neumann(T1 v, T2 x, const Policy& pol);
    template <class T1, class T2, class Policy>
-   typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_neumann_prime(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, Policy>::result_type cyl_neumann_prime(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_neumann(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_neumann(T1 v, T2 x);
    template <class T1, class T2>
-   typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_neumann_prime(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type cyl_neumann_prime(T1 v, T2 x);
 
    template <class T, class Policy>
-   typename detail::bessel_traits<T, T, Policy>::result_type sph_neumann(unsigned v, T x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, Policy>::result_type sph_neumann(unsigned v, T x, const Policy& pol);
    template <class T, class Policy>
-   typename detail::bessel_traits<T, T, Policy>::result_type sph_neumann_prime(unsigned v, T x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, Policy>::result_type sph_neumann_prime(unsigned v, T x, const Policy& pol);
 
    template <class T>
-   typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_neumann(unsigned v, T x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_neumann(unsigned v, T x);
    template <class T>
-   typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_neumann_prime(unsigned v, T x);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, policies::policy<> >::result_type sph_neumann_prime(unsigned v, T x);
 
    template <class T, class Policy>
-   typename detail::bessel_traits<T, T, Policy>::result_type cyl_bessel_j_zero(T v, int m, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, Policy>::result_type cyl_bessel_j_zero(T v, int m, const Policy& pol);
 
    template <class T>
-   typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_bessel_j_zero(T v, int m);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_bessel_j_zero(T v, int m);
 
    template <class T, class OutputIterator>
-   OutputIterator cyl_bessel_j_zero(T v,
+   BOOST_MATH_GPU_ENABLED OutputIterator cyl_bessel_j_zero(T v,
                           int start_index,
                           unsigned number_of_zeros,
                           OutputIterator out_it);
 
    template <class T, class OutputIterator, class Policy>
-   OutputIterator cyl_bessel_j_zero(T v,
+   BOOST_MATH_GPU_ENABLED OutputIterator cyl_bessel_j_zero(T v,
                           int start_index,
                           unsigned number_of_zeros,
                           OutputIterator out_it,
                           const Policy&);
 
    template <class T, class Policy>
-   typename detail::bessel_traits<T, T, Policy>::result_type cyl_neumann_zero(T v, int m, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, Policy>::result_type cyl_neumann_zero(T v, int m, const Policy& pol);
 
    template <class T>
-   typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_neumann_zero(T v, int m);
+   BOOST_MATH_GPU_ENABLED typename detail::bessel_traits<T, T, policies::policy<> >::result_type cyl_neumann_zero(T v, int m);
 
    template <class T, class OutputIterator>
-   OutputIterator cyl_neumann_zero(T v,
+   BOOST_MATH_GPU_ENABLED OutputIterator cyl_neumann_zero(T v,
                          int start_index,
                          unsigned number_of_zeros,
                          OutputIterator out_it);
 
    template <class T, class OutputIterator, class Policy>
-   OutputIterator cyl_neumann_zero(T v,
+   BOOST_MATH_GPU_ENABLED OutputIterator cyl_neumann_zero(T v,
                          int start_index,
                          unsigned number_of_zeros,
                          OutputIterator out_it,
                          const Policy&);
 
    template <class T1, class T2>
-   std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_1(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_1(T1 v, T2 x);
 
    template <class T1, class T2, class Policy>
-   std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_1(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2, class Policy>
-   std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> cyl_hankel_2(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2>
-   std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_2(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> cyl_hankel_2(T1 v, T2 x);
 
    template <class T1, class T2, class Policy>
-   std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_1(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_1(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2>
-   std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_1(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_1(T1 v, T2 x);
 
    template <class T1, class T2, class Policy>
-   std::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_2(T1 v, T2 x, const Policy& pol);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, Policy>::result_type> sph_hankel_2(T1 v, T2 x, const Policy& pol);
 
    template <class T1, class T2>
-   std::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_2(T1 v, T2 x);
+   BOOST_MATH_GPU_ENABLED boost::math::complex<typename detail::bessel_traits<T1, T2, policies::policy<> >::result_type> sph_hankel_2(T1 v, T2 x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> airy_ai(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_ai(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> airy_ai(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_ai(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> airy_bi(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_bi(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> airy_bi(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_bi(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> airy_ai_prime(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_ai_prime(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> airy_ai_prime(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_ai_prime(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> airy_bi_prime(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_bi_prime(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> airy_bi_prime(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> airy_bi_prime(T x);
 
    template <class T>
-   T airy_ai_zero(int m);
+   BOOST_MATH_GPU_ENABLED T airy_ai_zero(int m);
    template <class T, class Policy>
-   T airy_ai_zero(int m, const Policy&);
+   BOOST_MATH_GPU_ENABLED T airy_ai_zero(int m, const Policy&);
 
    template <class OutputIterator>
-   OutputIterator airy_ai_zero(
+   BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero(
                      int start_index,
                      unsigned number_of_zeros,
                      OutputIterator out_it);
    template <class OutputIterator, class Policy>
-   OutputIterator airy_ai_zero(
+   BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero(
                      int start_index,
                      unsigned number_of_zeros,
                      OutputIterator out_it,
                      const Policy&);
 
    template <class T>
-   T airy_bi_zero(int m);
+   BOOST_MATH_GPU_ENABLED T airy_bi_zero(int m);
    template <class T, class Policy>
-   T airy_bi_zero(int m, const Policy&);
+   BOOST_MATH_GPU_ENABLED T airy_bi_zero(int m, const Policy&);
 
    template <class OutputIterator>
-   OutputIterator airy_bi_zero(
+   BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero(
                      int start_index,
                      unsigned number_of_zeros,
                      OutputIterator out_it);
    template <class OutputIterator, class Policy>
-   OutputIterator airy_bi_zero(
+   BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero(
                      int start_index,
                      unsigned number_of_zeros,
                      OutputIterator out_it,
                      const Policy&);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> sin_pi(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sin_pi(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> sin_pi(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> sin_pi(T x);
 
    template <class T, class Policy>
-   tools::promote_args_t<T> cos_pi(T x, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> cos_pi(T x, const Policy&);
 
    template <class T>
-   tools::promote_args_t<T> cos_pi(T x);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> cos_pi(T x);
 
    template <class T>
-   int fpclassify BOOST_NO_MACRO_EXPAND(T t);
+   BOOST_MATH_GPU_ENABLED int fpclassify BOOST_NO_MACRO_EXPAND(T t);
 
    template <class T>
-   bool isfinite BOOST_NO_MACRO_EXPAND(T z);
+   BOOST_MATH_GPU_ENABLED bool isfinite BOOST_NO_MACRO_EXPAND(T z);
 
    template <class T>
-   bool isinf BOOST_NO_MACRO_EXPAND(T t);
+   BOOST_MATH_GPU_ENABLED bool isinf BOOST_NO_MACRO_EXPAND(T t);
 
    template <class T>
-   bool isnan BOOST_NO_MACRO_EXPAND(T t);
+   BOOST_MATH_GPU_ENABLED bool isnan BOOST_NO_MACRO_EXPAND(T t);
 
    template <class T>
-   bool isnormal BOOST_NO_MACRO_EXPAND(T t);
+   BOOST_MATH_GPU_ENABLED bool isnormal BOOST_NO_MACRO_EXPAND(T t);
 
    template<class T>
-   int signbit BOOST_NO_MACRO_EXPAND(T x);
+   BOOST_MATH_GPU_ENABLED int signbit BOOST_NO_MACRO_EXPAND(T x);
 
    template <class T>
-   int sign BOOST_NO_MACRO_EXPAND(const T& z);
+   BOOST_MATH_GPU_ENABLED int sign BOOST_NO_MACRO_EXPAND(const T& z);
 
    template <class T, class U>
-   typename tools::promote_args_permissive<T, U>::type copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive<T, U>::type 
+   copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y);
 
    template <class T>
-   typename tools::promote_args_permissive<T>::type changesign BOOST_NO_MACRO_EXPAND(const T& z);
+   BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive<T>::type 
+   changesign BOOST_NO_MACRO_EXPAND(const T& z);
 
    // Exponential integrals:
    namespace detail{
@@ -900,7 +983,7 @@ namespace boost
    template <class T, class U>
    struct expint_result
    {
-      typedef typename std::conditional<
+      typedef typename boost::math::conditional<
          policies::is_policy<U>::value,
          tools::promote_args_t<T>,
          typename tools::promote_args<U>::type
@@ -910,13 +993,13 @@ namespace boost
    } // namespace detail
 
    template <class T, class Policy>
-   tools::promote_args_t<T> expint(unsigned n, T z, const Policy&);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> expint(unsigned n, T z, const Policy&);
 
    template <class T, class U>
-   typename detail::expint_result<T, U>::type expint(T const z, U const u);
+   BOOST_MATH_GPU_ENABLED typename detail::expint_result<T, U>::type expint(T const z, U const u);
 
    template <class T>
-   tools::promote_args_t<T> expint(T z);
+   BOOST_MATH_GPU_ENABLED tools::promote_args_t<T> expint(T z);
 
    // Zeta:
    template <class T, class Policy>
@@ -1087,10 +1170,10 @@ namespace boost
 
    // pow:
    template <int N, typename T, class Policy>
-   BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base, const Policy& policy);
+   BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base, const Policy& policy);
 
    template <int N, typename T>
-   BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base);
+   BOOST_MATH_GPU_ENABLED BOOST_MATH_CXX14_CONSTEXPR tools::promote_args_t<T> pow(T base);
 
    // next:
    template <class T, class U, class Policy>
@@ -1191,13 +1274,13 @@ namespace boost
 #define BOOST_MATH_DETAIL_LL_FUNC(Policy)\
    \
    template <class T>\
-   inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <class T>\
-   inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v){ using boost::math::lltrunc; return lltrunc(v, Policy()); }\
    \
    template <class T>\
-   inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long long llround(const T& v){ using boost::math::llround; return llround(v, Policy()); }\
 
 #  define BOOST_MATH_DETAIL_11_FUNC(Policy)\
    template <class T, class U, class V>\
@@ -1210,74 +1293,74 @@ namespace boost
    BOOST_MATH_DETAIL_11_FUNC(Policy)\
    \
    template <class RT1, class RT2>\
-   inline boost::math::tools::promote_args_t<RT1, RT2> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> \
    beta(RT1 a, RT2 b) { return ::boost::math::beta(a, b, Policy()); }\
 \
    template <class RT1, class RT2, class A>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, A> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, A> \
    beta(RT1 a, RT2 b, A x){ return ::boost::math::beta(a, b, x, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    betac(RT1 a, RT2 b, RT3 x) { return ::boost::math::betac(a, b, x, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    ibeta(RT1 a, RT2 b, RT3 x){ return ::boost::math::ibeta(a, b, x, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    ibetac(RT1 a, RT2 b, RT3 x){ return ::boost::math::ibetac(a, b, x, Policy()); }\
 \
    template <class T1, class T2, class T3, class T4>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3, T4>  \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3, T4>  \
    ibeta_inv(T1 a, T2 b, T3 p, T4* py){ return ::boost::math::ibeta_inv(a, b, p, py, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    ibeta_inv(RT1 a, RT2 b, RT3 p){ return ::boost::math::ibeta_inv(a, b, p, Policy()); }\
 \
    template <class T1, class T2, class T3, class T4>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3, T4> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3, T4> \
    ibetac_inv(T1 a, T2 b, T3 q, T4* py){ return ::boost::math::ibetac_inv(a, b, q, py, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    ibeta_inva(RT1 a, RT2 b, RT3 p){ return ::boost::math::ibeta_inva(a, b, p, Policy()); }\
 \
    template <class T1, class T2, class T3>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3> \
    ibetac_inva(T1 a, T2 b, T3 q){ return ::boost::math::ibetac_inva(a, b, q, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    ibeta_invb(RT1 a, RT2 b, RT3 p){ return ::boost::math::ibeta_invb(a, b, p, Policy()); }\
 \
    template <class T1, class T2, class T3>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3> \
    ibetac_invb(T1 a, T2 b, T3 q){ return ::boost::math::ibetac_invb(a, b, q, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    ibetac_inv(RT1 a, RT2 b, RT3 q){ return ::boost::math::ibetac_inv(a, b, q, Policy()); }\
 \
    template <class RT1, class RT2, class RT3>\
-   inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2, RT3> \
    ibeta_derivative(RT1 a, RT2 b, RT3 x){ return ::boost::math::ibeta_derivative(a, b, x, Policy()); }\
 \
-   template <class T> T binomial_coefficient(unsigned n, unsigned k){ return ::boost::math::binomial_coefficient<T, Policy>(n, k, Policy()); }\
+   template <class T> BOOST_MATH_GPU_ENABLED T binomial_coefficient(unsigned n, unsigned k){ return ::boost::math::binomial_coefficient<T, Policy>(n, k, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> erf(RT z) { return ::boost::math::erf(z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> erf(RT z) { return ::boost::math::erf(z, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> erfc(RT z){ return ::boost::math::erfc(z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> erfc(RT z){ return ::boost::math::erfc(z, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> erf_inv(RT z) { return ::boost::math::erf_inv(z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> erf_inv(RT z) { return ::boost::math::erf_inv(z, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> erfc_inv(RT z){ return ::boost::math::erfc_inv(z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> erfc_inv(RT z){ return ::boost::math::erfc_inv(z, Policy()); }\
 \
    using boost::math::legendre_next;\
 \
@@ -1310,7 +1393,7 @@ namespace boost
    laguerre(unsigned n, T1 m, T2 x) { return ::boost::math::laguerre(n, m, x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> \
    hermite(unsigned n, T x){ return ::boost::math::hermite(n, x, Policy()); }\
 \
    using boost::math::hermite_next;\
@@ -1345,145 +1428,145 @@ namespace boost
       spherical_harmonic_i(unsigned n, int m, T1 theta, T2 phi, const Policy& pol);\
 \
    template <class T1, class T2, class T3>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3> \
    ellint_rf(T1 x, T2 y, T3 z){ return ::boost::math::ellint_rf(x, y, z, Policy()); }\
 \
    template <class T1, class T2, class T3>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3> \
    ellint_rd(T1 x, T2 y, T3 z){ return ::boost::math::ellint_rd(x, y, z, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> \
    ellint_rc(T1 x, T2 y){ return ::boost::math::ellint_rc(x, y, Policy()); }\
 \
    template <class T1, class T2, class T3, class T4>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3, T4> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3, T4> \
    ellint_rj(T1 x, T2 y, T3 z, T4 p){ return boost::math::ellint_rj(x, y, z, p, Policy()); }\
 \
    template <class T1, class T2, class T3>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3> \
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3> \
    ellint_rg(T1 x, T2 y, T3 z){ return ::boost::math::ellint_rg(x, y, z, Policy()); }\
    \
    template <typename T>\
-   inline boost::math::tools::promote_args_t<T> ellint_2(T k){ return boost::math::ellint_2(k, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> ellint_2(T k){ return boost::math::ellint_2(k, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> ellint_2(T1 k, T2 phi){ return boost::math::ellint_2(k, phi, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> ellint_2(T1 k, T2 phi){ return boost::math::ellint_2(k, phi, Policy()); }\
 \
    template <typename T>\
-   inline boost::math::tools::promote_args_t<T> ellint_d(T k){ return boost::math::ellint_d(k, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> ellint_d(T k){ return boost::math::ellint_d(k, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> ellint_d(T1 k, T2 phi){ return boost::math::ellint_d(k, phi, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> ellint_d(T1 k, T2 phi){ return boost::math::ellint_d(k, phi, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> jacobi_zeta(T1 k, T2 phi){ return boost::math::jacobi_zeta(k, phi, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> jacobi_zeta(T1 k, T2 phi){ return boost::math::jacobi_zeta(k, phi, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> heuman_lambda(T1 k, T2 phi){ return boost::math::heuman_lambda(k, phi, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> heuman_lambda(T1 k, T2 phi){ return boost::math::heuman_lambda(k, phi, Policy()); }\
 \
    template <typename T>\
-   inline boost::math::tools::promote_args_t<T> ellint_1(T k){ return boost::math::ellint_1(k, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> ellint_1(T k){ return boost::math::ellint_1(k, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> ellint_1(T1 k, T2 phi){ return boost::math::ellint_1(k, phi, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> ellint_1(T1 k, T2 phi){ return boost::math::ellint_1(k, phi, Policy()); }\
 \
    template <class T1, class T2, class T3>\
-   inline boost::math::tools::promote_args_t<T1, T2, T3> ellint_3(T1 k, T2 v, T3 phi){ return boost::math::ellint_3(k, v, phi, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2, T3> ellint_3(T1 k, T2 v, T3 phi){ return boost::math::ellint_3(k, v, phi, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> ellint_3(T1 k, T2 v){ return boost::math::ellint_3(k, v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> ellint_3(T1 k, T2 v){ return boost::math::ellint_3(k, v, Policy()); }\
 \
    using boost::math::max_factorial;\
    template <class RT>\
-   inline RT factorial(unsigned int i) { return boost::math::factorial<RT>(i, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline RT factorial(unsigned int i) { return boost::math::factorial<RT>(i, Policy()); }\
    using boost::math::unchecked_factorial;\
    template <class RT>\
-   inline RT double_factorial(unsigned i){ return boost::math::double_factorial<RT>(i, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline RT double_factorial(unsigned i){ return boost::math::double_factorial<RT>(i, Policy()); }\
    template <class RT>\
    inline boost::math::tools::promote_args_t<RT> falling_factorial(RT x, unsigned n){ return boost::math::falling_factorial(x, n, Policy()); }\
    template <class RT>\
    inline boost::math::tools::promote_args_t<RT> rising_factorial(RT x, unsigned n){ return boost::math::rising_factorial(x, n, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> tgamma(RT z){ return boost::math::tgamma(z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> tgamma(RT z){ return boost::math::tgamma(z, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> tgamma1pm1(RT z){ return boost::math::tgamma1pm1(z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> tgamma1pm1(RT z){ return boost::math::tgamma1pm1(z, Policy()); }\
 \
    template <class RT1, class RT2>\
-   inline boost::math::tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z){ return boost::math::tgamma(a, z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> tgamma(RT1 a, RT2 z){ return boost::math::tgamma(a, z, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> lgamma(RT z, int* sign){ return boost::math::lgamma(z, sign, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> lgamma(RT z, int* sign){ return boost::math::lgamma(z, sign, Policy()); }\
 \
    template <class RT>\
-   inline boost::math::tools::promote_args_t<RT> lgamma(RT x){ return boost::math::lgamma(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT> lgamma(RT x){ return boost::math::lgamma(x, Policy()); }\
 \
    template <class RT1, class RT2>\
-   inline boost::math::tools::promote_args_t<RT1, RT2> tgamma_lower(RT1 a, RT2 z){ return boost::math::tgamma_lower(a, z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> tgamma_lower(RT1 a, RT2 z){ return boost::math::tgamma_lower(a, z, Policy()); }\
 \
    template <class RT1, class RT2>\
-   inline boost::math::tools::promote_args_t<RT1, RT2> gamma_q(RT1 a, RT2 z){ return boost::math::gamma_q(a, z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> gamma_q(RT1 a, RT2 z){ return boost::math::gamma_q(a, z, Policy()); }\
 \
    template <class RT1, class RT2>\
-   inline boost::math::tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z){ return boost::math::gamma_p(a, z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<RT1, RT2> gamma_p(RT1 a, RT2 z){ return boost::math::gamma_p(a, z, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> tgamma_delta_ratio(T1 z, T2 delta){ return boost::math::tgamma_delta_ratio(z, delta, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> tgamma_delta_ratio(T1 z, T2 delta){ return boost::math::tgamma_delta_ratio(z, delta, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> tgamma_ratio(T1 a, T2 b) { return boost::math::tgamma_ratio(a, b, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> tgamma_ratio(T1 a, T2 b) { return boost::math::tgamma_ratio(a, b, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> gamma_p_derivative(T1 a, T2 x){ return boost::math::gamma_p_derivative(a, x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> gamma_p_derivative(T1 a, T2 x){ return boost::math::gamma_p_derivative(a, x, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> gamma_p_inv(T1 a, T2 p){ return boost::math::gamma_p_inv(a, p, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> gamma_p_inv(T1 a, T2 p){ return boost::math::gamma_p_inv(a, p, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> gamma_p_inva(T1 a, T2 p){ return boost::math::gamma_p_inva(a, p, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> gamma_p_inva(T1 a, T2 p){ return boost::math::gamma_p_inva(a, p, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> gamma_q_inv(T1 a, T2 q){ return boost::math::gamma_q_inv(a, q, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> gamma_q_inv(T1 a, T2 q){ return boost::math::gamma_q_inv(a, q, Policy()); }\
 \
    template <class T1, class T2>\
-   inline boost::math::tools::promote_args_t<T1, T2> gamma_q_inva(T1 a, T2 q){ return boost::math::gamma_q_inva(a, q, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T1, T2> gamma_q_inva(T1 a, T2 q){ return boost::math::gamma_q_inva(a, q, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> digamma(T x){ return boost::math::digamma(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> digamma(T x){ return boost::math::digamma(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> trigamma(T x){ return boost::math::trigamma(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> trigamma(T x){ return boost::math::trigamma(x, Policy()); }\
 \
    template <class T>\
    inline boost::math::tools::promote_args_t<T> polygamma(int n, T x){ return boost::math::polygamma(n, x, Policy()); }\
    \
    template <class T1, class T2>\
    inline boost::math::tools::promote_args_t<T1, T2> \
-   hypot(T1 x, T2 y){ return boost::math::hypot(x, y, Policy()); }\
+   BOOST_MATH_GPU_ENABLED hypot(T1 x, T2 y){ return boost::math::hypot(x, y, Policy()); }\
 \
    template <class RT>\
    inline boost::math::tools::promote_args_t<RT> cbrt(RT z){ return boost::math::cbrt(z, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> log1p(T x){ return boost::math::log1p(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> log1p(T x){ return boost::math::log1p(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> log1pmx(T x){ return boost::math::log1pmx(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> expm1(T x){ return boost::math::expm1(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> expm1(T x){ return boost::math::expm1(x, Policy()); }\
 \
    template <class T1, class T2>\
    inline boost::math::tools::promote_args_t<T1, T2> \
-   powm1(const T1 a, const T2 z){ return boost::math::powm1(a, z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED powm1(const T1 a, const T2 z){ return boost::math::powm1(a, z, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> sqrt1pm1(const T& val){ return boost::math::sqrt1pm1(val, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> sqrt1pm1(const T& val){ return boost::math::sqrt1pm1(val, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> sinc_pi(T x){ return boost::math::sinc_pi(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> sinc_pi(T x){ return boost::math::sinc_pi(x, Policy()); }\
 \
    template <class T>\
    inline boost::math::tools::promote_args_t<T> sinhc_pi(T x){ return boost::math::sinhc_pi(x, Policy()); }\
@@ -1495,7 +1578,7 @@ namespace boost
    inline boost::math::tools::promote_args_t<T> acosh(const T x){ return boost::math::acosh(x, Policy()); }\
 \
    template<typename T>\
-   inline boost::math::tools::promote_args_t<T> atanh(const T x){ return boost::math::atanh(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> atanh(const T x){ return boost::math::atanh(x, Policy()); }\
 \
    template <class T1, class T2>\
    inline typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type cyl_bessel_j(T1 v, T2 x)\
@@ -1568,10 +1651,10 @@ template <class OutputIterator, class T>\
    { boost::math::cyl_neumann_zero(v, start_index, number_of_zeros, out_it, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> sin_pi(T x){ return boost::math::sin_pi(x, Policy()); }\
 \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> cos_pi(T x){ return boost::math::cos_pi(x, Policy()); }\
 \
    using boost::math::fpclassify;\
    using boost::math::isfinite;\
@@ -1584,44 +1667,44 @@ template <class OutputIterator, class T>\
    using boost::math::changesign;\
    \
    template <class T, class U>\
-   inline typename boost::math::tools::promote_args_t<T,U> expint(T const& z, U const& u)\
+   BOOST_MATH_GPU_ENABLED inline typename boost::math::tools::promote_args_t<T,U> expint(T const& z, U const& u)\
    { return boost::math::expint(z, u, Policy()); }\
    \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> expint(T z){ return boost::math::expint(z, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> expint(T z){ return boost::math::expint(z, Policy()); }\
    \
    template <class T>\
    inline boost::math::tools::promote_args_t<T> zeta(T s){ return boost::math::zeta(s, Policy()); }\
    \
    template <class T>\
-   inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T round(const T& v){ using boost::math::round; return round(v, Policy()); }\
    \
    template <class T>\
-   inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline int iround(const T& v){ using boost::math::iround; return iround(v, Policy()); }\
    \
    template <class T>\
-   inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long lround(const T& v){ using boost::math::lround; return lround(v, Policy()); }\
    \
    template <class T>\
-   inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T trunc(const T& v){ using boost::math::trunc; return trunc(v, Policy()); }\
    \
    template <class T>\
-   inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v){ using boost::math::itrunc; return itrunc(v, Policy()); }\
    \
    template <class T>\
-   inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v){ using boost::math::ltrunc; return ltrunc(v, Policy()); }\
    \
    template <class T>\
-   inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <class T>\
-   inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <class T>\
-   inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart){ using boost::math::modf; return modf(v, ipart, Policy()); }\
    \
    template <int N, class T>\
-   inline boost::math::tools::promote_args_t<T> pow(T v){ return boost::math::pow<N>(v, Policy()); }\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> pow(T v){ return boost::math::pow<N>(v, Policy()); }\
    \
    template <class T> T nextafter(const T& a, const T& b){ return static_cast<T>(boost::math::nextafter(a, b, Policy())); }\
    template <class T> T float_next(const T& a){ return static_cast<T>(boost::math::float_next(a, Policy())); }\
@@ -1633,19 +1716,19 @@ template <class OutputIterator, class T>\
    inline boost::math::tools::promote_args_t<RT1, RT2> owens_t(RT1 a, RT2 z){ return boost::math::owens_t(a, z, Policy()); }\
    \
    template <class T1, class T2>\
-   inline std::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> cyl_hankel_1(T1 v, T2 x)\
+   inline BOOST_MATH_GPU_ENABLED boost::math::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> cyl_hankel_1(T1 v, T2 x)\
    {  return boost::math::cyl_hankel_1(v, x, Policy()); }\
    \
    template <class T1, class T2>\
-   inline std::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> cyl_hankel_2(T1 v, T2 x)\
+   inline BOOST_MATH_GPU_ENABLED boost::math::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> cyl_hankel_2(T1 v, T2 x)\
    { return boost::math::cyl_hankel_2(v, x, Policy()); }\
    \
    template <class T1, class T2>\
-   inline std::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> sph_hankel_1(T1 v, T2 x)\
+   inline BOOST_MATH_GPU_ENABLED boost::math::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> sph_hankel_1(T1 v, T2 x)\
    { return boost::math::sph_hankel_1(v, x, Policy()); }\
    \
    template <class T1, class T2>\
-   inline std::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> sph_hankel_2(T1 v, T2 x)\
+   inline BOOST_MATH_GPU_ENABLED boost::math::complex<typename boost::math::detail::bessel_traits<T1, T2, Policy >::result_type> sph_hankel_2(T1 v, T2 x)\
    { return boost::math::sph_hankel_2(v, x, Policy()); }\
    \
    template <class T>\
@@ -1749,33 +1832,33 @@ template <class OutputIterator, class T>\
    { return boost::math::jacobi_theta4m1tau(z, q, Policy()); }\
    \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> airy_ai(T x)\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> airy_ai(T x)\
    {  return boost::math::airy_ai(x, Policy());  }\
    \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> airy_bi(T x)\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> airy_bi(T x)\
    {  return boost::math::airy_bi(x, Policy());  }\
    \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> airy_ai_prime(T x)\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> airy_ai_prime(T x)\
    {  return boost::math::airy_ai_prime(x, Policy());  }\
    \
    template <class T>\
-   inline boost::math::tools::promote_args_t<T> airy_bi_prime(T x)\
+   BOOST_MATH_GPU_ENABLED inline boost::math::tools::promote_args_t<T> airy_bi_prime(T x)\
    {  return boost::math::airy_bi_prime(x, Policy());  }\
    \
    template <class T>\
-   inline T airy_ai_zero(int m)\
+   BOOST_MATH_GPU_ENABLED inline T airy_ai_zero(int m)\
    { return boost::math::airy_ai_zero<T>(m, Policy()); }\
    template <class T, class OutputIterator>\
-   OutputIterator airy_ai_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\
+   BOOST_MATH_GPU_ENABLED OutputIterator airy_ai_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\
    { return boost::math::airy_ai_zero<T>(start_index, number_of_zeros, out_it, Policy()); }\
    \
    template <class T>\
-   inline T airy_bi_zero(int m)\
+   BOOST_MATH_GPU_ENABLED inline T airy_bi_zero(int m)\
    { return boost::math::airy_bi_zero<T>(m, Policy()); }\
    template <class T, class OutputIterator>\
-   OutputIterator airy_bi_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\
+   BOOST_MATH_GPU_ENABLED OutputIterator airy_bi_zero(int start_index, unsigned number_of_zeros, OutputIterator out_it)\
    { return boost::math::airy_bi_zero<T>(start_index, number_of_zeros, out_it, Policy()); }\
    \
    template <class T>\
@@ -1813,6 +1896,6 @@ template <class OutputIterator, class T>\
 
 
 
-
+#endif // BOOST_MATH_HAS_NVRTC
 
 #endif // BOOST_MATH_SPECIAL_MATH_FWD_HPP
diff --git a/include/boost/math/special_functions/modf.hpp b/include/boost/math/special_functions/modf.hpp
index 75e6be9f46..6e372ec9a3 100644
--- a/include/boost/math/special_functions/modf.hpp
+++ b/include/boost/math/special_functions/modf.hpp
@@ -1,4 +1,5 @@
 //  Copyright John Maddock 2007.
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,56 +11,60 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/policies/policy.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/special_functions/math_fwd.hpp>
+#endif
 
 namespace boost{ namespace math{
 
 template <class T, class Policy>
-inline T modf(const T& v, T* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart, const Policy& pol)
 {
    *ipart = trunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, T* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, T* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline T modf(const T& v, int* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart, const Policy& pol)
 {
    *ipart = itrunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, int* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, int* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline T modf(const T& v, long* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart, const Policy& pol)
 {
    *ipart = ltrunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, long* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline T modf(const T& v, long long* ipart, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart, const Policy& pol)
 {
    *ipart = lltrunc(v, pol);
    return v - *ipart;
 }
 template <class T>
-inline T modf(const T& v, long long* ipart)
+BOOST_MATH_GPU_ENABLED inline T modf(const T& v, long long* ipart)
 {
    return modf(v, ipart, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp
index 02a208e4eb..fd08162f98 100644
--- a/include/boost/math/special_functions/next.hpp
+++ b/include/boost/math/special_functions/next.hpp
@@ -10,6 +10,11 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+// TODO(mborland): Need to remove recurrsion from these algos
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
@@ -920,4 +925,6 @@ inline typename tools::promote_args<T>::type float_advance(const T& val, int dis
 
 }} // boost math namespaces
 
+#endif
+
 #endif // BOOST_MATH_SPECIAL_NEXT_HPP
diff --git a/include/boost/math/special_functions/pow.hpp b/include/boost/math/special_functions/pow.hpp
index 9c64889977..7a1bb14eba 100644
--- a/include/boost/math/special_functions/pow.hpp
+++ b/include/boost/math/special_functions/pow.hpp
@@ -2,6 +2,7 @@
 //   Computes a power with exponent known at compile-time
 
 //  (C) Copyright Bruno Lalande 2008.
+//  (C) Copyright Matt Borland 2024.
 //  Distributed under the Boost Software License, Version 1.0.
 //  (See accompanying file LICENSE_1_0.txt or copy at
 //  http://www.boost.org/LICENSE_1_0.txt)
@@ -12,12 +13,14 @@
 #ifndef BOOST_MATH_POW_HPP
 #define BOOST_MATH_POW_HPP
 
-
-#include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/policies/policy.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/tools/promotion.hpp>
 
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/special_functions/math_fwd.hpp>
+#endif
 
 namespace boost {
 namespace math {
@@ -34,7 +37,7 @@ template <int N, int M = N%2>
 struct positive_power
 {
     template <typename T>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base)
+    BOOST_MATH_GPU_ENABLED static constexpr T result(T base)
     {
         T power = positive_power<N/2>::result(base);
         return power * power;
@@ -45,7 +48,7 @@ template <int N>
 struct positive_power<N, 1>
 {
     template <typename T>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base)
+    BOOST_MATH_GPU_ENABLED static constexpr T result(T base)
     {
         T power = positive_power<N/2>::result(base);
         return base * power * power;
@@ -56,7 +59,7 @@ template <>
 struct positive_power<1, 1>
 {
     template <typename T>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base){ return base; }
+    BOOST_MATH_GPU_ENABLED static constexpr T result(T base){ return base; }
 };
 
 
@@ -64,7 +67,7 @@ template <int N, bool>
 struct power_if_positive
 {
     template <typename T, class Policy>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy&)
+    BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy&)
     { return positive_power<N>::result(base); }
 };
 
@@ -72,7 +75,7 @@ template <int N>
 struct power_if_positive<N, false>
 {
     template <typename T, class Policy>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy)
+    BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy& policy)
     {
         if (base == 0)
         {
@@ -91,7 +94,7 @@ template <>
 struct power_if_positive<0, true>
 {
     template <typename T, class Policy>
-    static BOOST_MATH_CXX14_CONSTEXPR T result(T base, const Policy& policy)
+    BOOST_MATH_GPU_ENABLED static constexpr T result(T base, const Policy& policy)
     {
         if (base == 0)
         {
@@ -120,14 +123,14 @@ struct select_power_if_positive
 
 
 template <int N, typename T, class Policy>
-BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args<T>::type pow(T base, const Policy& policy)
+BOOST_MATH_GPU_ENABLED constexpr inline typename tools::promote_args<T>::type pow(T base, const Policy& policy)
 { 
    using result_type = typename tools::promote_args<T>::type;
    return detail::select_power_if_positive<N>::type::result(static_cast<result_type>(base), policy); 
 }
 
 template <int N, typename T>
-BOOST_MATH_CXX14_CONSTEXPR inline typename tools::promote_args<T>::type pow(T base)
+BOOST_MATH_GPU_ENABLED constexpr inline typename tools::promote_args<T>::type pow(T base)
 { return pow<N>(base, policies::policy<>()); }
 
 #ifdef _MSC_VER
diff --git a/include/boost/math/special_functions/powm1.hpp b/include/boost/math/special_functions/powm1.hpp
index e52277b16d..80d02dc299 100644
--- a/include/boost/math/special_functions/powm1.hpp
+++ b/include/boost/math/special_functions/powm1.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -12,6 +13,7 @@
 #pragma warning(disable:4702) // Unreachable code (release mode only warning)
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/special_functions/expm1.hpp>
@@ -22,32 +24,23 @@
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-inline T powm1_imp(const T x, const T y, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T powm1_imp(const T x, const T y, const Policy& pol)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::powm1<%1%>(%1%, %1%)";
-   if (x > 0)
+   constexpr auto function = "boost::math::powm1<%1%>(%1%, %1%)";
+
+   if ((fabs(y * (x - 1)) < T(0.5)) || (fabs(y) < T(0.2)))
    {
-      if ((fabs(y * (x - 1)) < T(0.5)) || (fabs(y) < T(0.2)))
-      {
-         // We don't have any good/quick approximation for log(x) * y
-         // so just try it and see:
-         T l = y * log(x);
-         if (l < T(0.5))
-            return boost::math::expm1(l, pol);
-         if (l > boost::math::tools::log_max_value<T>())
-            return boost::math::policies::raise_overflow_error<T>(function, nullptr, pol);
-         // fall through....
-      }
-   }
-   else if ((boost::math::signbit)(x)) // Need to error check -0 here as well
-   {
-      // y had better be an integer:
-      if (boost::math::trunc(y) != y)
-         return boost::math::policies::raise_domain_error<T>(function, "For non-integral exponent, expected base > 0 but got %1%", x, pol);
-      if (boost::math::trunc(y / 2) == y / 2)
-         return powm1_imp(T(-x), y, pol);
+      // We don't have any good/quick approximation for log(x) * y
+      // so just try it and see:
+      T l = y * log(x);
+      if (l < T(0.5))
+         return boost::math::expm1(l, pol);
+      if (l > boost::math::tools::log_max_value<T>())
+         return boost::math::policies::raise_overflow_error<T>(function, nullptr, pol);
+      // fall through....
    }
+   
    T result = pow(x, y) - 1;
    if((boost::math::isinf)(result))
       return result < 0 ? -boost::math::policies::raise_overflow_error<T>(function, nullptr, pol) : boost::math::policies::raise_overflow_error<T>(function, nullptr, pol);
@@ -56,22 +49,41 @@ inline T powm1_imp(const T x, const T y, const Policy& pol)
    return result;
 }
 
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED inline T powm1_imp_dispatch(const T x, const T y, const Policy& pol)
+{
+   BOOST_MATH_STD_USING
+
+   if ((boost::math::signbit)(x)) // Need to error check -0 here as well
+   {
+      constexpr auto function = "boost::math::powm1<%1%>(%1%, %1%)";
+
+      // y had better be an integer:
+      if (boost::math::trunc(y) != y)
+         return boost::math::policies::raise_domain_error<T>(function, "For non-integral exponent, expected base > 0 but got %1%", x, pol);
+      if (boost::math::trunc(y / 2) == y / 2)
+         return powm1_imp(T(-x), T(y), pol);
+   }
+
+   return powm1_imp(T(x), T(y), pol);
+}
+
 } // detail
 
 template <class T1, class T2>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    powm1(const T1 a, const T2 z)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
-   return detail::powm1_imp(static_cast<result_type>(a), static_cast<result_type>(z), policies::policy<>());
+   return detail::powm1_imp_dispatch(static_cast<result_type>(a), static_cast<result_type>(z), policies::policy<>());
 }
 
 template <class T1, class T2, class Policy>
-inline typename tools::promote_args<T1, T2>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T1, T2>::type
    powm1(const T1 a, const T2 z, const Policy& pol)
 {
    typedef typename tools::promote_args<T1, T2>::type result_type;
-   return detail::powm1_imp(static_cast<result_type>(a), static_cast<result_type>(z), pol);
+   return detail::powm1_imp_dispatch(static_cast<result_type>(a), static_cast<result_type>(z), pol);
 }
 
 } // namespace math
diff --git a/include/boost/math/special_functions/round.hpp b/include/boost/math/special_functions/round.hpp
index e74acba85b..bb99da7e31 100644
--- a/include/boost/math/special_functions/round.hpp
+++ b/include/boost/math/special_functions/round.hpp
@@ -12,6 +12,9 @@
 #endif
 
 #include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <boost/math/ccmath/detail/config.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
@@ -30,7 +33,7 @@ namespace boost{ namespace math{
 namespace detail{
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> round(const T& v, const Policy& pol, const std::false_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v, const Policy& pol, const std::false_type&)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
@@ -65,7 +68,7 @@ inline tools::promote_args_t<T> round(const T& v, const Policy& pol, const std::
    }
 }
 template <class T, class Policy>
-inline tools::promote_args_t<T> round(const T& v, const Policy&, const std::true_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v, const Policy&, const std::true_type&)
 {
    return v;
 }
@@ -73,12 +76,12 @@ inline tools::promote_args_t<T> round(const T& v, const Policy&, const std::true
 } // namespace detail
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> round(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v, const Policy& pol)
 {
    return detail::round(v, pol, std::integral_constant<bool, detail::is_integer_for_rounding<T>::value>());
 }
 template <class T>
-inline tools::promote_args_t<T> round(const T& v)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> round(const T& v)
 {
    return round(v, policies::policy<>());
 }
@@ -103,7 +106,7 @@ inline int iround(const T& v, const Policy& pol)
 
    result_type r = boost::math::round(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -127,7 +130,7 @@ inline int iround(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -138,20 +141,20 @@ inline int iround(const T& v, const Policy& pol)
    return static_cast<int>(r);
 }
 template <class T>
-inline int iround(const T& v)
+BOOST_MATH_GPU_ENABLED inline int iround(const T& v)
 {
    return iround(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long lround(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long lround(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
 
    result_type r = boost::math::round(v, pol);
    
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -175,7 +178,7 @@ inline long lround(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -186,20 +189,20 @@ inline long lround(const T& v, const Policy& pol)
    return static_cast<long>(r);
 }
 template <class T>
-inline long lround(const T& v)
+BOOST_MATH_GPU_ENABLED inline long lround(const T& v)
 {
    return lround(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long long llround(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long long llround(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = boost::math::tools::promote_args_t<T>;
 
    result_type r = boost::math::round(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -223,7 +226,7 @@ inline long long llround(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -234,11 +237,117 @@ inline long long llround(const T& v, const Policy& pol)
    return static_cast<long long>(r);
 }
 template <class T>
-inline long long llround(const T& v)
+BOOST_MATH_GPU_ENABLED inline long long llround(const T& v)
 {
    return llround(v, policies::policy<>());
 }
 
 }} // namespaces
 
+#else // Specialized NVRTC overloads
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T round(T x)
+{
+   return ::round(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED float round(float x)
+{
+   return ::roundf(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED T round(T x, const Policy&)
+{
+   return ::round(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED float round(float x, const Policy&)
+{
+   return ::roundf(x);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED int iround(T x)
+{
+   return static_cast<int>(::lround(x));
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED int iround(float x)
+{
+   return static_cast<int>(::lroundf(x));
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED int iround(T x, const Policy&)
+{
+   return static_cast<int>(::lround(x));
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED int iround(float x, const Policy&)
+{
+   return static_cast<int>(::lroundf(x));
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED long lround(T x)
+{
+   return ::lround(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED long lround(float x)
+{
+   return ::lroundf(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED long lround(T x, const Policy&)
+{
+   return ::lround(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED long lround(float x, const Policy&)
+{
+   return ::lroundf(x);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED long long llround(T x)
+{
+   return ::llround(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED long long llround(float x)
+{
+   return ::llroundf(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED long long llround(T x, const Policy&)
+{
+   return ::llround(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED long long llround(float x, const Policy&)
+{
+   return ::llroundf(x);
+}
+
+} // Namespace math
+} // Namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTC
+
 #endif // BOOST_MATH_ROUND_HPP
diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp
index 8f9fc4793a..4f76522654 100644
--- a/include/boost/math/special_functions/sign.hpp
+++ b/include/boost/math/special_functions/sign.hpp
@@ -1,6 +1,7 @@
 //  (C) Copyright John Maddock 2006.
 //  (C) Copyright Johan Rade 2006.
 //  (C) Copyright Paul A. Bristow 2011 (added changesign).
+//  (C) Copyright Matt Borland 2024
 
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
@@ -13,6 +14,8 @@
 #pragma once
 #endif
 
+#ifndef __CUDACC_RTC__
+
 #include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/detail/fp_traits.hpp>
@@ -25,9 +28,10 @@ namespace detail {
 
 #ifdef BOOST_MATH_USE_STD_FPCLASSIFY
     template<class T> 
-    inline int signbit_impl(T x, native_tag const&)
+    BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, native_tag const&)
     {
-        return (std::signbit)(x) ? 1 : 0;
+        using std::signbit;
+        return (signbit)(x) ? 1 : 0;
     }
 #endif
 
@@ -35,13 +39,13 @@ namespace detail {
     // signed zero or NaN.
 
     template<class T>
-    inline int signbit_impl(T x, generic_tag<true> const&)
+    BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag<true> const&)
     {
         return x < 0;
     }
 
     template<class T> 
-    inline int signbit_impl(T x, generic_tag<false> const&)
+    BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, generic_tag<false> const&)
     {
         return x < 0;
     }
@@ -65,7 +69,7 @@ namespace detail {
 #endif
 
     template<class T>
-    inline int signbit_impl(T x, ieee_copy_all_bits_tag const&)
+    BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, ieee_copy_all_bits_tag const&)
     {
         typedef typename fp_traits<T>::type traits;
 
@@ -75,7 +79,7 @@ namespace detail {
     }
 
     template<class T> 
-    inline int signbit_impl(T x, ieee_copy_leading_bits_tag const&)
+    BOOST_MATH_GPU_ENABLED inline int signbit_impl(T x, ieee_copy_leading_bits_tag const&)
     {
         typedef typename fp_traits<T>::type traits;
 
@@ -91,13 +95,13 @@ namespace detail {
     // signed zero or NaN.
 
     template<class T>
-    inline T (changesign_impl)(T x, generic_tag<true> const&)
+    BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag<true> const&)
     {
         return -x;
     }
 
     template<class T>
-    inline T (changesign_impl)(T x, generic_tag<false> const&)
+    BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, generic_tag<false> const&)
     {
         return -x;
     }
@@ -124,7 +128,7 @@ namespace detail {
 #endif
 
     template<class T>
-    inline T changesign_impl(T x, ieee_copy_all_bits_tag const&)
+    BOOST_MATH_GPU_ENABLED inline T changesign_impl(T x, ieee_copy_all_bits_tag const&)
     {
         typedef typename fp_traits<T>::sign_change_type traits;
 
@@ -136,7 +140,7 @@ namespace detail {
     }
 
     template<class T>
-    inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&)
+    BOOST_MATH_GPU_ENABLED inline T (changesign_impl)(T x, ieee_copy_leading_bits_tag const&)
     {
         typedef typename fp_traits<T>::sign_change_type traits;
 
@@ -150,7 +154,8 @@ namespace detail {
 
 }   // namespace detail
 
-template<class T> int (signbit)(T x)
+template<class T> 
+BOOST_MATH_GPU_ENABLED int (signbit)(T x)
 { 
    typedef typename detail::fp_traits<T>::type traits;
    typedef typename traits::method method;
@@ -160,12 +165,13 @@ template<class T> int (signbit)(T x)
 }
 
 template <class T>
-inline int sign BOOST_NO_MACRO_EXPAND(const T& z)
+BOOST_MATH_GPU_ENABLED inline int sign BOOST_NO_MACRO_EXPAND(const T& z)
 {
    return (z == 0) ? 0 : (boost::math::signbit)(z) ? -1 : 1;
 }
 
-template <class T> typename tools::promote_args_permissive<T>::type (changesign)(const T& x)
+template <class T> 
+BOOST_MATH_GPU_ENABLED typename tools::promote_args_permissive<T>::type (changesign)(const T& x)
 { //!< \brief return unchanged binary pattern of x, except for change of sign bit. 
    typedef typename detail::fp_traits<T>::sign_change_type traits;
    typedef typename traits::method method;
@@ -176,7 +182,7 @@ template <class T> typename tools::promote_args_permissive<T>::type (changesign)
 }
 
 template <class T, class U>
-inline typename tools::promote_args_permissive<T, U>::type 
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args_permissive<T, U>::type 
    copysign BOOST_NO_MACRO_EXPAND(const T& x, const U& y)
 {
    BOOST_MATH_STD_USING
@@ -188,6 +194,47 @@ inline typename tools::promote_args_permissive<T, U>::type
 } // namespace math
 } // namespace boost
 
+#else // NVRTC alias versions
+
+#include <boost/math/tools/config.hpp>
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED int signbit(T x)
+{
+    return ::signbit(x);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T changesign(T x)
+{
+    return -x;
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T copysign(T x, T y)
+{
+    return ::copysign(x, y);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED float copysign(float x, float y)
+{
+    return ::copysignf(x, y);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T sign(T z)
+{
+    return (z == 0) ? 0 : ::signbit(z) ? -1 : 1;
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // __CUDACC_RTC__
 
 #endif // BOOST_MATH_TOOLS_SIGN_HPP
 
diff --git a/include/boost/math/special_functions/sin_pi.hpp b/include/boost/math/special_functions/sin_pi.hpp
index 5b8eb6fcf2..e59e232e6d 100644
--- a/include/boost/math/special_functions/sin_pi.hpp
+++ b/include/boost/math/special_functions/sin_pi.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2007 John Maddock
+//  Copyright (c) 2024 Matt Borland
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,9 +11,14 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <cmath>
 #include <limits>
-#include <boost/math/tools/config.hpp>
+#include <type_traits>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/trunc.hpp>
 #include <boost/math/tools/promotion.hpp>
@@ -21,11 +27,9 @@
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-inline T sin_pi_imp(T x, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline T sin_pi_imp(T x, const Policy&)
 {
    BOOST_MATH_STD_USING // ADL of std names
-   if(x < 0)
-      return -sin_pi_imp(T(-x), pol);
    // sin of pi*x:
    if(x < T(0.5))
       return sin(constants::pi<T>() * x);
@@ -39,7 +43,7 @@ inline T sin_pi_imp(T x, const Policy& pol)
       invert = false;
 
    T rem = floor(x);
-   if(abs(floor(rem/2)*2 - rem) > std::numeric_limits<T>::epsilon())
+   if(abs(floor(rem/2)*2 - rem) > boost::math::numeric_limits<T>::epsilon())
    {
       invert = !invert;
    }
@@ -53,10 +57,23 @@ inline T sin_pi_imp(T x, const Policy& pol)
    return invert ? T(-rem) : rem;
 }
 
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED inline T sin_pi_dispatch(T x, const Policy& pol)
+{
+   if (x < T(0))
+   {
+      return -sin_pi_imp(T(-x), pol);
+   }
+   else
+   {
+      return sin_pi_imp(T(x), pol);
+   }
+}
+
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type sin_pi(T x, const Policy&)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type sin_pi(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
@@ -69,7 +86,7 @@ inline typename tools::promote_args<T>::type sin_pi(T x, const Policy&)
       // We want to ignore overflows since the result is in [-1,1] and the 
       // check slows the code down considerably.
       policies::overflow_error<policies::ignore_error> >::type forwarding_policy;
-   return policies::checked_narrowing_cast<result_type, forwarding_policy>(boost::math::detail::sin_pi_imp<value_type>(x, forwarding_policy()), "sin_pi");
+   return policies::checked_narrowing_cast<result_type, forwarding_policy>(boost::math::detail::sin_pi_dispatch<value_type>(x, forwarding_policy()), "sin_pi");
 }
 
 template <class T>
@@ -80,5 +97,40 @@ inline typename tools::promote_args<T>::type sin_pi(T x)
 
 } // namespace math
 } // namespace boost
+
+#else // Special handling for NVRTC
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto sin_pi(T x)
+{
+   return ::sinpi(x);
+}
+
+template <>
+BOOST_MATH_GPU_ENABLED auto sin_pi(float x)
+{
+   return ::sinpif(x);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto sin_pi(T x, const Policy&)
+{
+   return ::sinpi(x);
+}
+
+template <typename Policy>
+BOOST_MATH_GPU_ENABLED auto sin_pi(float x, const Policy&)
+{
+   return ::sinpif(x);
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_HAS_NVRTC
+
 #endif
 
diff --git a/include/boost/math/special_functions/sinc.hpp b/include/boost/math/special_functions/sinc.hpp
index ff1b2e966b..0c18ac3468 100644
--- a/include/boost/math/special_functions/sinc.hpp
+++ b/include/boost/math/special_functions/sinc.hpp
@@ -17,13 +17,13 @@
 
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/promotion.hpp>
 #include <boost/math/policies/policy.hpp>
-#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
-#include <limits>
-#include <string>
-#include <stdexcept>
-#include <cmath>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/special_functions/math_fwd.hpp>
+#endif
 
 // These are the the "Sinus Cardinal" functions.
 
@@ -36,7 +36,7 @@ namespace boost
         // This is the "Sinus Cardinal" of index Pi.
 
         template<typename T>
-        inline T    sinc_pi_imp(const T x)
+        BOOST_MATH_GPU_ENABLED inline T    sinc_pi_imp(const T x)
         {
             BOOST_MATH_STD_USING
 
@@ -44,7 +44,7 @@ namespace boost
             {
                return 0;
             }
-            else if (abs(x) >= 3.3 * tools::forth_root_epsilon<T>())
+            else if (abs(x) >= T(3.3) * tools::forth_root_epsilon<T>())
             {
                 return(sin(x)/x);
             }
@@ -58,24 +58,23 @@ namespace boost
        } // namespace detail
 
        template <class T>
-       inline typename tools::promote_args<T>::type sinc_pi(T x)
+       BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type sinc_pi(T x)
        {
           typedef typename tools::promote_args<T>::type result_type;
           return detail::sinc_pi_imp(static_cast<result_type>(x));
        }
 
        template <class T, class Policy>
-       inline typename tools::promote_args<T>::type sinc_pi(T x, const Policy&)
+       BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type sinc_pi(T x, const Policy&)
        {
           typedef typename tools::promote_args<T>::type result_type;
           return detail::sinc_pi_imp(static_cast<result_type>(x));
        }
 
         template<typename T, template<typename> class U>
-        inline U<T>    sinc_pi(const U<T> x)
+        BOOST_MATH_GPU_ENABLED inline U<T>    sinc_pi(const U<T> x)
         {
             BOOST_MATH_STD_USING
-            using    ::std::numeric_limits;
 
             T const    taylor_0_bound = tools::epsilon<T>();
             T const    taylor_2_bound = tools::root_epsilon<T>();
@@ -88,11 +87,11 @@ namespace boost
             else
             {
                 // approximation by taylor series in x at 0 up to order 0
-#ifdef __MWERKS__
+                #ifdef __MWERKS__
                 U<T>    result = static_cast<U<T> >(1);
-#else
+                #else
                 U<T>    result = U<T>(1);
-#endif
+                #endif
 
                 if    (abs(x) >= taylor_0_bound)
                 {
@@ -113,7 +112,7 @@ namespace boost
         }
 
         template<typename T, template<typename> class U, class Policy>
-        inline U<T>    sinc_pi(const U<T> x, const Policy&)
+        BOOST_MATH_GPU_ENABLED inline U<T>    sinc_pi(const U<T> x, const Policy&)
         {
            return sinc_pi(x);
         }
diff --git a/include/boost/math/special_functions/sqrt1pm1.hpp b/include/boost/math/special_functions/sqrt1pm1.hpp
index 041916a53f..4d8aeb38cf 100644
--- a/include/boost/math/special_functions/sqrt1pm1.hpp
+++ b/include/boost/math/special_functions/sqrt1pm1.hpp
@@ -10,6 +10,7 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/log1p.hpp>
 #include <boost/math/special_functions/expm1.hpp>
@@ -21,7 +22,7 @@
 namespace boost{ namespace math{
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type sqrt1pm1(const T& val, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type sqrt1pm1(const T& val, const Policy& pol)
 {
    typedef typename tools::promote_args<T>::type result_type;
    BOOST_MATH_STD_USING
@@ -32,7 +33,7 @@ inline typename tools::promote_args<T>::type sqrt1pm1(const T& val, const Policy
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type sqrt1pm1(const T& val)
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type sqrt1pm1(const T& val)
 {
    return sqrt1pm1(val, policies::policy<>());
 }
diff --git a/include/boost/math/special_functions/trigamma.hpp b/include/boost/math/special_functions/trigamma.hpp
index f74b43db1f..61a60b502f 100644
--- a/include/boost/math/special_functions/trigamma.hpp
+++ b/include/boost/math/special_functions/trigamma.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,14 +11,22 @@
 #pragma once
 #endif
 
-#include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/rational.hpp>
-#include <boost/math/tools/series.hpp>
 #include <boost/math/tools/promotion.hpp>
+#include <boost/math/tools/big_constant.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/policies/policy.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/constants/constants.hpp>
-#include <boost/math/tools/big_constant.hpp>
+#include <boost/math/special_functions/sin_pi.hpp>
+#include <boost/math/special_functions/pow.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/special_functions/polygamma.hpp>
+#include <boost/math/tools/series.hpp>
+#endif
 
 #if defined(__GNUC__) && defined(BOOST_MATH_USE_FLOAT128)
 //
@@ -33,15 +42,24 @@ namespace boost{
 namespace math{
 namespace detail{
 
+// TODO(mborland): Temporary for NVRTC
+#ifndef BOOST_MATH_HAS_NVRTC
 template<class T, class Policy>
 T polygamma_imp(const int n, T x, const Policy &pol);
 
 template <class T, class Policy>
-T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
+T trigamma_prec(T x, const Policy& pol, const boost::math::integral_constant<int, 0>&)
+{
+   return polygamma_imp(1, x, pol);
+}
+#endif
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant<int, 53>&)
 {
    // Max error in interpolated form: 3.736e-017
-   static const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469);
-   static const T P_1_2[] = {
+   BOOST_MATH_STATIC const T offset = BOOST_MATH_BIG_CONSTANT(T, 53, 2.1093254089355469);
+   BOOST_MATH_STATIC const T P_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, -1.1093280605946045),
       BOOST_MATH_BIG_CONSTANT(T, 53, -3.8310674472619321),
       BOOST_MATH_BIG_CONSTANT(T, 53, -3.3703848401898283),
@@ -49,7 +67,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.6638069578676164),
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.64468386819102836),
    };
-   static const T Q_1_2[] = {
+   BOOST_MATH_STATIC const T Q_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 53, 3.4535389668541151),
       BOOST_MATH_BIG_CONSTANT(T, 53, 4.5208926987851437),
@@ -58,7 +76,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.20314516859987728e-6),
    };
    // Max error in interpolated form: 1.159e-017
-   static const T P_2_4[] = {
+   BOOST_MATH_STATIC const T P_2_4[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, -0.13803835004508849e-7),
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.50000049158540261),
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.6077979838469348),
@@ -66,7 +84,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 53, 2.0534873203680393),
       BOOST_MATH_BIG_CONSTANT(T, 53, 0.74566981111565923),
    };
-   static const T Q_2_4[] = {
+   BOOST_MATH_STATIC const T Q_2_4[] = {
       BOOST_MATH_BIG_CONSTANT(T, 53, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 53, 2.8822787662376169),
       BOOST_MATH_BIG_CONSTANT(T, 53, 4.1681660554090917),
@@ -77,7 +95,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
    // Maximum Deviation Found:                     6.896e-018
    // Expected Error Term :                       -6.895e-018
    // Maximum Relative Change in Control Points :  8.497e-004
-   static const T P_4_inf[] = {
+   BOOST_MATH_STATIC const T P_4_inf[] = {
       static_cast<T>(0.68947581948701249e-17L),
       static_cast<T>(0.49999999999998975L),
       static_cast<T>(1.0177274392923795L),
@@ -86,7 +104,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
       static_cast<T>(1.5897035272532764L),
       static_cast<T>(0.40154388356961734L),
    };
-   static const T Q_4_inf[] = {
+   BOOST_MATH_STATIC const T Q_4_inf[] = {
       static_cast<T>(1.0L),
       static_cast<T>(1.7021215452463932L),
       static_cast<T>(4.4290431747556469L),
@@ -110,11 +128,11 @@ T trigamma_prec(T x, const std::integral_constant<int, 53>*, const Policy&)
 }
 
 template <class T, class Policy>
-T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
+BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant<int, 64>&)
 {
    // Max error in interpolated form: 1.178e-020
-   static const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875);
-   static const T P_1_2[] = {
+   BOOST_MATH_STATIC const T offset_1_2 = BOOST_MATH_BIG_CONSTANT(T, 64, 2.109325408935546875);
+   BOOST_MATH_STATIC const T P_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -1.10932535608960258341),
       BOOST_MATH_BIG_CONSTANT(T, 64, -4.18793841543017129052),
       BOOST_MATH_BIG_CONSTANT(T, 64, -4.63865531898487734531),
@@ -123,7 +141,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.21172611429185622377),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.259635673503366427284),
    };
-   static const T Q_1_2[] = {
+   BOOST_MATH_STATIC const T Q_1_2[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.77521119359546982995),
       BOOST_MATH_BIG_CONSTANT(T, 64, 5.664338024578956321),
@@ -133,7 +151,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.629642219810618032207e-8),
    };
    // Max error in interpolated form: 3.912e-020
-   static const T P_2_8[] = {
+   BOOST_MATH_STATIC const T P_2_8[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.387540035162952880976e-11),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000276430504),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.21926880986360957306),
@@ -143,7 +161,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 13.4346512182925923978),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.98656291026448279118),
    };
-   static const T Q_2_8[] = {
+   BOOST_MATH_STATIC const T Q_2_8[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, 6.10520430478613667724),
       BOOST_MATH_BIG_CONSTANT(T, 64, 18.475001060603645512),
@@ -156,7 +174,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
    // Maximum Deviation Found:                     2.635e-020
    // Expected Error Term :                        2.635e-020
    // Maximum Relative Change in Control Points :  1.791e-003
-   static const T P_8_inf[] = {
+   BOOST_MATH_STATIC const T P_8_inf[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.263527875092466899848e-19),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.500000000000000058145),
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.0730121433777364138677),
@@ -164,7 +182,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
       BOOST_MATH_BIG_CONSTANT(T, 64, 0.0517092358874932620529),
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.07995383547483921121),
    };
-   static const T Q_8_inf[] = {
+   BOOST_MATH_STATIC const T Q_8_inf[] = {
       BOOST_MATH_BIG_CONSTANT(T, 64, 1.0),
       BOOST_MATH_BIG_CONSTANT(T, 64, -0.187309046577818095504),
       BOOST_MATH_BIG_CONSTANT(T, 64, 3.95255391645238842975),
@@ -188,7 +206,7 @@ T trigamma_prec(T x, const std::integral_constant<int, 64>*, const Policy&)
 }
 
 template <class T, class Policy>
-T trigamma_prec(T x, const std::integral_constant<int, 113>*, const Policy&)
+BOOST_MATH_GPU_ENABLED T trigamma_prec(T x, const Policy&, const boost::math::integral_constant<int, 113>&)
 {
    // Max error in interpolated form: 1.916e-035
 
@@ -356,8 +374,8 @@ T trigamma_prec(T x, const std::integral_constant<int, 113>*, const Policy&)
    return (1 + tools::evaluate_polynomial(P_16_inf, y) / tools::evaluate_polynomial(Q_16_inf, y)) / x;
 }
 
-template <class T, class Tag, class Policy>
-T trigamma_imp(T x, const Tag* t, const Policy& pol)
+template <class T, class Policy, class Tag>
+BOOST_MATH_GPU_ENABLED T trigamma_dispatch(T x, const Policy& pol, const Tag& tag)
 {
    //
    // This handles reflection of negative arguments, and all our
@@ -373,27 +391,29 @@ T trigamma_imp(T x, const Tag* t, const Policy& pol)
    {
       // Reflect:
       T z = 1 - x;
+
+      if(z < 1)
+      {
+         result = 1 / (z * z);
+         z += 1;
+      }
+
       // Argument reduction for tan:
       if(floor(x) == x)
       {
          return policies::raise_pole_error<T>("boost::math::trigamma<%1%>(%1%)", nullptr, (1-x), pol);
       }
       T s = fabs(x) < fabs(z) ? boost::math::sin_pi(x, pol) : boost::math::sin_pi(z, pol);
-      return -trigamma_imp(z, t, pol) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
+      return result - trigamma_prec(T(z), pol, tag) + boost::math::pow<2>(constants::pi<T>()) / (s * s);
    }
    if(x < 1)
    {
       result = 1 / (x * x);
       x += 1;
    }
-   return result + trigamma_prec(x, t, pol);
+   return result + trigamma_prec(x, pol, tag);
 }
 
-template <class T, class Policy>
-T trigamma_imp(T x, const std::integral_constant<int, 0>*, const Policy& pol)
-{
-   return polygamma_imp(1, x, pol);
-}
 //
 // Initializer: ensure all our constants are initialized prior to the first call of main:
 //
@@ -402,22 +422,24 @@ struct trigamma_initializer
 {
    struct init
    {
-      init()
+      BOOST_MATH_GPU_ENABLED init()
       {
          typedef typename policies::precision<T, Policy>::type precision_type;
-         do_init(std::integral_constant<bool, precision_type::value && (precision_type::value <= 113)>());
+         do_init(boost::math::integral_constant<bool, precision_type::value && (precision_type::value <= 113)>());
       }
-      void do_init(const std::true_type&)
+      BOOST_MATH_GPU_ENABLED void do_init(const boost::math::true_type&)
       {
          boost::math::trigamma(T(2.5), Policy());
       }
-      void do_init(const std::false_type&){}
-      void force_instantiate()const{}
+      BOOST_MATH_GPU_ENABLED void do_init(const boost::math::false_type&){}
+      BOOST_MATH_GPU_ENABLED void force_instantiate()const{}
    };
    static const init initializer;
-   static void force_instantiate()
+   BOOST_MATH_GPU_ENABLED static void force_instantiate()
    {
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
       initializer.force_instantiate();
+      #endif
    }
 };
 
@@ -427,13 +449,13 @@ const typename trigamma_initializer<T, Policy>::init trigamma_initializer<T, Pol
 } // namespace detail
 
 template <class T, class Policy>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    trigamma(T x, const Policy&)
 {
    typedef typename tools::promote_args<T>::type result_type;
    typedef typename policies::evaluation<result_type, Policy>::type value_type;
    typedef typename policies::precision<T, Policy>::type precision_type;
-   typedef std::integral_constant<int,
+   typedef boost::math::integral_constant<int,
       precision_type::value <= 0 ? 0 :
       precision_type::value <= 53 ? 53 :
       precision_type::value <= 64 ? 64 :
@@ -449,13 +471,14 @@ inline typename tools::promote_args<T>::type
    // Force initialization of constants:
    detail::trigamma_initializer<value_type, forwarding_policy>::force_instantiate();
 
-   return policies::checked_narrowing_cast<result_type, Policy>(detail::trigamma_imp(
+   return policies::checked_narrowing_cast<result_type, Policy>(detail::trigamma_dispatch(
       static_cast<value_type>(x),
-      static_cast<const tag_type*>(nullptr), forwarding_policy()), "boost::math::trigamma<%1%>(%1%)");
+      forwarding_policy(),
+      tag_type()), "boost::math::trigamma<%1%>(%1%)");
 }
 
 template <class T>
-inline typename tools::promote_args<T>::type
+BOOST_MATH_GPU_ENABLED inline typename tools::promote_args<T>::type
    trigamma(T x)
 {
    return trigamma(x, policies::policy<>());
diff --git a/include/boost/math/special_functions/trunc.hpp b/include/boost/math/special_functions/trunc.hpp
index a084de560b..b52f4f321c 100644
--- a/include/boost/math/special_functions/trunc.hpp
+++ b/include/boost/math/special_functions/trunc.hpp
@@ -11,9 +11,14 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 #include <type_traits>
 #include <boost/math/special_functions/math_fwd.hpp>
-#include <boost/math/tools/config.hpp>
 #include <boost/math/ccmath/detail/config.hpp>
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
@@ -27,7 +32,7 @@
 namespace boost{ namespace math{ namespace detail{
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol, const std::false_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol, const std::false_type&)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
@@ -39,23 +44,66 @@ inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol, const std::
 }
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> trunc(const T& v, const Policy&, const std::true_type&)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v, const Policy&, const std::true_type&)
 {
    return v;
 }
 
-}
+} // Namespace detail
 
 template <class T, class Policy>
-inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v, const Policy& pol)
 {
    return detail::trunc(v, pol, std::integral_constant<bool, detail::is_integer_for_rounding<T>::value>());
 }
+
 template <class T>
-inline tools::promote_args_t<T> trunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline tools::promote_args_t<T> trunc(const T& v)
 {
    return trunc(v, policies::policy<>());
 }
+
+#else // Special handling for nvrtc
+
+namespace boost {
+namespace math {
+
+namespace detail {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED double trunc_impl(T x)
+{
+   return static_cast<double>(x);
+}
+
+BOOST_MATH_GPU_ENABLED inline float trunc_impl(float x)
+{
+   return ::truncf(x);
+}
+
+BOOST_MATH_GPU_ENABLED inline double trunc_impl(double x)
+{
+   return ::trunc(x);
+}
+
+} // Namespace detail
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED auto trunc(T x, const Policy&)
+{
+   return detail::trunc_impl(x);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED auto trunc(T x)
+{
+   return detail::trunc_impl(x);
+}
+
+#endif
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
 //
 // The following functions will not compile unless T has an
 // implicit conversion to the integer types.  For user-defined
@@ -70,13 +118,13 @@ inline tools::promote_args_t<T> trunc(const T& v)
 // https://stackoverflow.com/questions/27442885/syntax-error-with-stdnumeric-limitsmax
 //
 template <class T, class Policy>
-inline int itrunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
    result_type r = boost::math::trunc(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -100,7 +148,7 @@ inline int itrunc(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<int>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -110,20 +158,21 @@ inline int itrunc(const T& v, const Policy& pol)
 
    return static_cast<int>(r);
 }
+
 template <class T>
-inline int itrunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline int itrunc(const T& v)
 {
    return itrunc(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long ltrunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
    result_type r = boost::math::trunc(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -147,7 +196,7 @@ inline long ltrunc(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -157,20 +206,21 @@ inline long ltrunc(const T& v, const Policy& pol)
 
    return static_cast<long>(r);
 }
+
 template <class T>
-inline long ltrunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline long ltrunc(const T& v)
 {
    return ltrunc(v, policies::policy<>());
 }
 
 template <class T, class Policy>
-inline long long lltrunc(const T& v, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v, const Policy& pol)
 {
    BOOST_MATH_STD_USING
    using result_type = tools::promote_args_t<T>;
    result_type r = boost::math::trunc(v, pol);
 
-   #ifdef BOOST_MATH_HAS_CONSTEXPR_LDEXP
+   #if defined(BOOST_MATH_HAS_CONSTEXPR_LDEXP) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
    if constexpr (std::is_arithmetic_v<result_type>
                  #ifdef BOOST_MATH_FLOAT128_TYPE
                  && !std::is_same_v<BOOST_MATH_FLOAT128_TYPE, result_type>
@@ -194,7 +244,7 @@ inline long long lltrunc(const T& v, const Policy& pol)
       }
    }
    #else
-   static const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const result_type max_val = ldexp(static_cast<result_type>(1), std::numeric_limits<long long>::digits);
 
    if (r >= max_val || r < -max_val)
    {
@@ -204,21 +254,81 @@ inline long long lltrunc(const T& v, const Policy& pol)
 
    return static_cast<long long>(r);
 }
+
 template <class T>
-inline long long lltrunc(const T& v)
+BOOST_MATH_GPU_ENABLED inline long long lltrunc(const T& v)
 {
    return lltrunc(v, policies::policy<>());
 }
 
+#else // Reduced impl specifically for NVRTC platform
+
+namespace detail {
+
+template <typename TargetType, typename T>
+BOOST_MATH_GPU_ENABLED TargetType integer_trunc_impl(T v)
+{
+   double r = boost::math::trunc(v);
+
+   const double max_val = ldexp(1.0, boost::math::numeric_limits<TargetType>::digits);
+
+   if (r >= max_val || r < -max_val)
+   {
+      r = 0;
+   }
+
+   return static_cast<TargetType>(r);
+}
+
+} // Namespace detail
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED int itrunc(T v)
+{
+   return detail::integer_trunc_impl<int>(v);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED int itrunc(T v, const Policy&)
+{
+   return detail::integer_trunc_impl<int>(v);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED long ltrunc(T v)
+{
+   return detail::integer_trunc_impl<long>(v);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED long ltrunc(T v, const Policy&)
+{
+   return detail::integer_trunc_impl<long>(v);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED long long lltrunc(T v)
+{
+   return detail::integer_trunc_impl<long long>(v);
+}
+
+template <typename T, typename Policy>
+BOOST_MATH_GPU_ENABLED long long lltrunc(T v, const Policy&)
+{
+   return detail::integer_trunc_impl<long long>(v);
+}
+
+#endif // BOOST_MATH_HAS_NVRTC
+
 template <class T, class Policy>
-inline typename std::enable_if<std::is_constructible<int, T>::value, int>::type
+BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t<boost::math::is_constructible_v<int, T>, int>
    iconvert(const T& v, const Policy&)
 {
    return static_cast<int>(v);
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<!std::is_constructible<int, T>::value, int>::type
+BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t<!boost::math::is_constructible_v<int, T>, int>
    iconvert(const T& v, const Policy& pol)
 {
    using boost::math::itrunc;
@@ -226,14 +336,14 @@ inline typename std::enable_if<!std::is_constructible<int, T>::value, int>::type
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<std::is_constructible<long, T>::value, long>::type
+BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t<boost::math::is_constructible_v<long, T>, long>
    lconvert(const T& v, const Policy&)
 {
    return static_cast<long>(v);
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<!std::is_constructible<long, T>::value, long>::type
+BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t<!boost::math::is_constructible_v<long, T>, long>
    lconvert(const T& v, const Policy& pol)
 {
    using boost::math::ltrunc;
@@ -241,14 +351,29 @@ inline typename std::enable_if<!std::is_constructible<long, T>::value, long>::ty
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<std::is_constructible<long long, T>::value, long long>::type
+BOOST_MATH_GPU_ENABLED inline boost::math::enable_if_t<boost::math::is_constructible_v<long long, T>, long long>
+   llconvert(const T& v, const Policy&)
+{
+   return static_cast<long long>(v);
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED inline typename boost::math::enable_if_t<!boost::math::is_constructible_v<long long, T>, long long>
+   llconvert(const T& v, const Policy& pol)
+{
+   using boost::math::lltrunc;
+   return lltrunc(v, pol);
+}
+
+template <class T, class Policy>
+BOOST_MATH_GPU_ENABLED [[deprecated("Use llconvert")]] inline boost::math::enable_if_t<boost::math::is_constructible_v<long long, T>, long long>
    llconvertert(const T& v, const Policy&)
 {
    return static_cast<long long>(v);
 }
 
 template <class T, class Policy>
-inline typename std::enable_if<!std::is_constructible<long long, T>::value, long long>::type
+BOOST_MATH_GPU_ENABLED [[deprecated("Use llconvert")]] inline typename boost::math::enable_if_t<!boost::math::is_constructible_v<long long, T>, long long>
    llconvertert(const T& v, const Policy& pol)
 {
    using boost::math::lltrunc;
diff --git a/include/boost/math/special_functions/ulp.hpp b/include/boost/math/special_functions/ulp.hpp
index 3c0616db0e..5d1617aced 100644
--- a/include/boost/math/special_functions/ulp.hpp
+++ b/include/boost/math/special_functions/ulp.hpp
@@ -14,6 +14,7 @@
 #include <boost/math/policies/error_handling.hpp>
 #include <boost/math/special_functions/fpclassify.hpp>
 #include <boost/math/special_functions/next.hpp>
+#include <boost/math/tools/precision.hpp>
 
 namespace boost{ namespace math{ namespace detail{
 
diff --git a/include/boost/math/tools/array.hpp b/include/boost/math/tools/array.hpp
new file mode 100644
index 0000000000..23e666673c
--- /dev/null
+++ b/include/boost/math/tools/array.hpp
@@ -0,0 +1,41 @@
+//  Copyright (c) 2024 Matt Borland
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+//  Regular use of std::array functions can not be used on 
+//  GPU platforms like CUDA since they are missing the __device__ marker
+//  Alias as needed to get correct support
+
+#ifndef BOOST_MATH_TOOLS_ARRAY_HPP
+#define BOOST_MATH_TOOLS_ARRAY_HPP
+
+#include <boost/math/tools/config.hpp>
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <cuda/std/array>
+
+namespace boost {
+namespace math {
+
+using cuda::std::array;
+
+} // namespace math
+} // namespace boost
+
+#else
+
+#include <array>
+
+namespace boost {
+namespace math {
+
+using std::array;
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
+#endif // BOOST_MATH_TOOLS_ARRAY_HPP
diff --git a/include/boost/math/tools/assert.hpp b/include/boost/math/tools/assert.hpp
index 3d5655923a..3f57351fc1 100644
--- a/include/boost/math/tools/assert.hpp
+++ b/include/boost/math/tools/assert.hpp
@@ -10,6 +10,19 @@
 #ifndef BOOST_MATH_TOOLS_ASSERT_HPP
 #define BOOST_MATH_TOOLS_ASSERT_HPP
 
+#include <boost/math/tools/config.hpp>
+
+#ifdef BOOST_MATH_HAS_GPU_SUPPORT
+
+// Run time asserts are generally unsupported
+
+#define BOOST_MATH_ASSERT(expr)
+#define BOOST_MATH_ASSERT_MSG(expr, msg)
+#define BOOST_MATH_STATIC_ASSERT(expr) static_assert(expr, #expr " failed")
+#define BOOST_MATH_STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg)
+
+#else
+
 #include <boost/math/tools/is_standalone.hpp>
 
 #ifndef BOOST_MATH_STANDALONE
@@ -29,6 +42,8 @@
 #define BOOST_MATH_STATIC_ASSERT(expr) static_assert(expr, #expr " failed")
 #define BOOST_MATH_STATIC_ASSERT_MSG(expr, msg) static_assert(expr, msg)
 
-#endif
+#endif // Is standalone
+
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
 
 #endif // BOOST_MATH_TOOLS_ASSERT_HPP
diff --git a/include/boost/math/tools/big_constant.hpp b/include/boost/math/tools/big_constant.hpp
index eaa34dd230..0d54976bc4 100644
--- a/include/boost/math/tools/big_constant.hpp
+++ b/include/boost/math/tools/big_constant.hpp
@@ -8,6 +8,12 @@
 #define BOOST_MATH_TOOLS_BIG_CONSTANT_HPP
 
 #include <boost/math/tools/config.hpp>
+
+// On NVRTC we don't need any of this
+// We just have a simple definition of the macro since the largest float
+// type on the platform is a 64-bit double
+#ifndef BOOST_MATH_HAS_NVRTC 
+
 #ifndef BOOST_MATH_STANDALONE
 #include <boost/lexical_cast.hpp>
 #endif
@@ -43,12 +49,12 @@ typedef double largest_float;
 #endif
 
 template <class T>
-inline constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::false_type const&) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::false_type const&) BOOST_MATH_NOEXCEPT(T)
 {
    return static_cast<T>(v);
 }
 template <class T>
-inline constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::true_type const&) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED constexpr T make_big_value(largest_float v, const char*, std::true_type const&, std::true_type const&) BOOST_MATH_NOEXCEPT(T)
 {
    return static_cast<T>(v);
 }
@@ -94,5 +100,7 @@ inline constexpr T make_big_value(largest_float, const char* s, std::false_type
 
 }}} // namespaces
 
+#endif // BOOST_MATH_HAS_NVRTC
+
 #endif
 
diff --git a/include/boost/math/tools/complex.hpp b/include/boost/math/tools/complex.hpp
index d462ca8092..ec51440116 100644
--- a/include/boost/math/tools/complex.hpp
+++ b/include/boost/math/tools/complex.hpp
@@ -10,9 +10,39 @@
 #ifndef BOOST_MATH_TOOLS_COMPLEX_HPP
 #define BOOST_MATH_TOOLS_COMPLEX_HPP
 
-#include <utility>
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/is_detected.hpp>
 
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <cuda/std/utility>
+#include <cuda/std/complex>
+
+namespace boost {
+namespace math {
+
+template <typename T>
+using complex = cuda::std::complex<T>;
+
+} // namespace math
+} // namespace boost
+
+#else
+
+#include <utility>
+#include <complex>
+
+namespace boost {
+namespace math {
+
+template <typename T>
+using complex = std::complex<T>;
+
+} // namespace math
+} // namespace boost
+
+#endif
+
 namespace boost {
    namespace math {
       namespace tools {
@@ -24,12 +54,21 @@ namespace boost {
             static constexpr bool value = false;
          };
 
+         #ifndef BOOST_MATH_ENABLE_CUDA
          template <typename T>
          struct is_complex_type_impl<T, void_t<decltype(std::declval<T>().real()), 
                                                decltype(std::declval<T>().imag())>>
          {
             static constexpr bool value = true;
          };
+         #else
+         template <typename T>
+         struct is_complex_type_impl<T, void_t<decltype(cuda::std::declval<T>().real()), 
+                                               decltype(cuda::std::declval<T>().imag())>>
+         {
+            static constexpr bool value = true;
+         };
+         #endif
          } // Namespace detail
 
          template <typename T>
diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp
index 6d962a08a6..12f3411c2f 100644
--- a/include/boost/math/tools/config.hpp
+++ b/include/boost/math/tools/config.hpp
@@ -11,6 +11,8 @@
 #pragma once
 #endif
 
+#ifndef __CUDACC_RTC__
+
 #include <boost/math/tools/is_standalone.hpp>
 
 // Minimum language standard transition
@@ -218,12 +220,16 @@
 
 #include <boost/math/tools/user.hpp>
 
-#if (defined(__NetBSD__) || defined(__EMSCRIPTEN__)\
+#if (defined(__NetBSD__)\
    || (defined(__hppa) && !defined(__OpenBSD__)) || (defined(__NO_LONG_DOUBLE_MATH) && (DBL_MANT_DIG != LDBL_MANT_DIG))) \
    && !defined(BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS)
 //#  define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
 #endif
 
+#if defined(__EMSCRIPTEN__) && !defined(BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS)
+#  define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+#endif
+
 #ifdef __IBMCPP__
 //
 // For reasons I don't understand, the tests with IMB's compiler all
@@ -463,7 +469,7 @@ struct non_type {};
 #if defined(BOOST_MATH_STANDALONE) && defined(_GLIBCXX_USE_FLOAT128) && defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) && !defined(__STRICT_ANSI__) \
    && !defined(BOOST_MATH_DISABLE_FLOAT128) && !defined(BOOST_MATH_USE_FLOAT128)
 #  define BOOST_MATH_USE_FLOAT128
-#elif defined(BOOST_HAS_FLOAT128) && !defined(BOOST_MATH_USE_FLOAT128)
+#elif defined(BOOST_HAS_FLOAT128) && !defined(BOOST_MATH_USE_FLOAT128) && !defined(BOOST_MATH_DISABLE_FLOAT128)
 #  define BOOST_MATH_USE_FLOAT128
 #endif
 #ifdef BOOST_MATH_USE_FLOAT128
@@ -522,7 +528,9 @@ struct non_type {};
    using std::ceil;\
    using std::floor;\
    using std::log10;\
-   using std::sqrt;
+   using std::sqrt;\
+   using std::log2;\
+   using std::ilogb;
 
 #define BOOST_MATH_STD_USING BOOST_MATH_STD_USING_CORE
 
@@ -660,6 +668,184 @@ namespace boost{ namespace math{
 #define BOOST_MATH_CONSTEXPR_TABLE_FUNCTION
 #endif
 
+//
+// CUDA support:
+//
+
+#ifdef __CUDACC__
+
+// We have to get our include order correct otherwise you get compilation failures
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cuda/std/type_traits>
+#include <cuda/std/utility>
+#include <cuda/std/cstdint>
+#include <cuda/std/array>
+#include <cuda/std/tuple>
+#include <cuda/std/complex>
+
+#  define BOOST_MATH_CUDA_ENABLED __host__ __device__
+#  define BOOST_MATH_HAS_GPU_SUPPORT
+
+#  ifndef BOOST_MATH_ENABLE_CUDA
+#    define BOOST_MATH_ENABLE_CUDA
+#  endif
+
+// Device code can not handle exceptions
+#  ifndef BOOST_MATH_NO_EXCEPTIONS
+#    define BOOST_MATH_NO_EXCEPTIONS
+#  endif
+
+// We want to use force inline from CUDA instead of the host compiler
+#  undef BOOST_MATH_FORCEINLINE
+#  define BOOST_MATH_FORCEINLINE __forceinline__
+
+#elif defined(SYCL_LANGUAGE_VERSION)
+
+#  define BOOST_MATH_SYCL_ENABLED SYCL_EXTERNAL
+#  define BOOST_MATH_HAS_GPU_SUPPORT
+
+#  ifndef BOOST_MATH_ENABLE_SYCL
+#    define BOOST_MATH_ENABLE_SYCL
+#  endif
+
+#  ifndef BOOST_MATH_NO_EXCEPTIONS
+#    define BOOST_MATH_NO_EXCEPTIONS
+#  endif
+
+// spir64 does not support long double
+#  define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+#  define BOOST_MATH_NO_REAL_CONCEPT_TESTS
+
+#  undef BOOST_MATH_FORCEINLINE
+#  define BOOST_MATH_FORCEINLINE inline
+
+#endif
+
+#ifndef BOOST_MATH_CUDA_ENABLED
+#  define BOOST_MATH_CUDA_ENABLED
+#endif
+
+#ifndef BOOST_MATH_SYCL_ENABLED
+#  define BOOST_MATH_SYCL_ENABLED
+#endif
+
+// Not all functions that allow CUDA allow SYCL (e.g. Recursion is disallowed by SYCL)
+#  define BOOST_MATH_GPU_ENABLED BOOST_MATH_CUDA_ENABLED BOOST_MATH_SYCL_ENABLED
+
+// Additional functions that need replaced/marked up
+#ifdef BOOST_MATH_HAS_GPU_SUPPORT
+template <class T>
+BOOST_MATH_GPU_ENABLED constexpr void gpu_safe_swap(T& a, T& b) { T t(a); a = b; b = t; }
+template <class T>
+BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_min(const T& a, const T& b) { return a < b ? a : b; }
+template <class T>
+BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_max(const T& a, const T& b) { return a > b ? a : b; }
+
+#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b)
+#define BOOST_MATH_GPU_SAFE_MIN(a, b) gpu_safe_min(a, b)
+#define BOOST_MATH_GPU_SAFE_MAX(a, b) gpu_safe_max(a, b)
+
+#else
+
+#define BOOST_MATH_GPU_SAFE_SWAP(a, b) std::swap(a, b)
+#define BOOST_MATH_GPU_SAFE_MIN(a, b) (std::min)(a, b)
+#define BOOST_MATH_GPU_SAFE_MAX(a, b) (std::max)(a, b)
+
+#endif
+
+// Static variables are not allowed with CUDA or C++20 modules
+// See if we can inline them instead
+
+#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
+#  define BOOST_MATH_INLINE_CONSTEXPR inline constexpr
+#  define BOOST_MATH_STATIC static
+#  ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE static
+#  else
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE
+#  endif
+#else
+#  ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#    define BOOST_MATH_INLINE_CONSTEXPR static constexpr
+#    define BOOST_MATH_STATIC static
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE
+#  else
+#    define BOOST_MATH_INLINE_CONSTEXPR constexpr
+#    define BOOST_MATH_STATIC constexpr
+#    define BOOST_MATH_STATIC_LOCAL_VARIABLE static
+#  endif
+#endif
+
+#define BOOST_MATH_FP_NAN FP_NAN
+#define BOOST_MATH_FP_INFINITE FP_INFINITE
+#define BOOST_MATH_FP_ZERO FP_ZERO
+#define BOOST_MATH_FP_SUBNORMAL FP_SUBNORMAL
+#define BOOST_MATH_FP_NORMAL FP_NORMAL
+
+#else // Special section for CUDA NVRTC to ensure we consume no STL headers
+
+#ifndef BOOST_MATH_STANDALONE
+#  define BOOST_MATH_STANDALONE
+#endif
+
+#define BOOST_MATH_HAS_NVRTC
+#define BOOST_MATH_ENABLE_CUDA
+#define BOOST_MATH_HAS_GPU_SUPPORT
+
+#define BOOST_MATH_GPU_ENABLED __host__ __device__
+#define BOOST_MATH_CUDA_ENABLED __host__ __device__
+
+#define BOOST_MATH_STATIC static
+#define BOOST_MATH_STATIC_LOCAL_VARIABLE
+
+#define BOOST_MATH_NOEXCEPT(T) noexcept(boost::math::is_floating_point_v<T>)
+#define BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T) 
+#define BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T) 
+#define BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE_SPEC(T) 
+#define BOOST_MATH_BIG_CONSTANT(T, N, V) static_cast<T>(V)
+#define BOOST_MATH_FORCEINLINE __forceinline__
+#define BOOST_MATH_STD_USING  
+#define BOOST_MATH_IF_CONSTEXPR if
+#define BOOST_MATH_IS_FLOAT(T) (boost::math::is_floating_point<T>::value)
+#define BOOST_MATH_CONSTEXPR_TABLE_FUNCTION constexpr
+#define BOOST_MATH_NO_EXCEPTIONS
+#define BOOST_MATH_PREVENT_MACRO_SUBSTITUTION 
+
+// This should be defined to nothing but since it is not specifically a math macro
+// we need to undef before proceeding
+#ifdef BOOST_FPU_EXCEPTION_GUARD
+#  undef BOOST_FPU_EXCEPTION_GUARD
+#endif
+
+#define BOOST_FPU_EXCEPTION_GUARD
+
+template <class T>
+BOOST_MATH_GPU_ENABLED constexpr void gpu_safe_swap(T& a, T& b) { T t(a); a = b; b = t; }
+
+#define BOOST_MATH_GPU_SAFE_SWAP(a, b) gpu_safe_swap(a, b)
+#define BOOST_MATH_GPU_SAFE_MIN(a, b) (::min)(a, b)
+#define BOOST_MATH_GPU_SAFE_MAX(a, b) (::max)(a, b)
+
+#define BOOST_MATH_FP_NAN 0
+#define BOOST_MATH_FP_INFINITE 1
+#define BOOST_MATH_FP_ZERO 2
+#define BOOST_MATH_FP_SUBNORMAL 3
+#define BOOST_MATH_FP_NORMAL 4
+
+#define BOOST_MATH_INT_VALUE_SUFFIX(RV, SUF) RV##SUF
+#define BOOST_MATH_INT_TABLE_TYPE(RT, IT) IT
+
+#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
+#  define BOOST_MATH_INLINE_CONSTEXPR inline constexpr
+#else
+#  define BOOST_MATH_INLINE_CONSTEXPR constexpr
+#endif
+
+#define BOOST_MATH_INSTRUMENT_VARIABLE(x)
+#define BOOST_MATH_INSTRUMENT_CODE(x) 
+
+#endif // NVRTC
 
 #endif // BOOST_MATH_TOOLS_CONFIG_HPP
 
diff --git a/include/boost/math/tools/cstdint.hpp b/include/boost/math/tools/cstdint.hpp
new file mode 100644
index 0000000000..ce2c913b5c
--- /dev/null
+++ b/include/boost/math/tools/cstdint.hpp
@@ -0,0 +1,107 @@
+//  Copyright (c) 2024 Matt Borland
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_TOOLS_CSTDINT
+#define BOOST_MATH_TOOLS_CSTDINT
+
+#include <boost/math/tools/config.hpp>
+
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <cuda/std/cstdint>
+
+namespace boost {
+namespace math {
+
+using cuda::std::int8_t;
+using cuda::std::int16_t;
+using cuda::std::int32_t;
+using cuda::std::int64_t;
+
+using cuda::std::int_fast8_t;
+using cuda::std::int_fast16_t;
+using cuda::std::int_fast32_t;
+using cuda::std::int_fast64_t;
+
+using cuda::std::int_least8_t;
+using cuda::std::int_least16_t;
+using cuda::std::int_least32_t;
+using cuda::std::int_least64_t;
+
+using cuda::std::intmax_t;
+using cuda::std::intptr_t;
+
+using cuda::std::uint8_t;
+using cuda::std::uint16_t;
+using cuda::std::uint32_t;
+using cuda::std::uint64_t;
+
+using cuda::std::uint_fast8_t;
+using cuda::std::uint_fast16_t;
+using cuda::std::uint_fast32_t;
+using cuda::std::uint_fast64_t;
+
+using cuda::std::uint_least8_t;
+using cuda::std::uint_least16_t;
+using cuda::std::uint_least32_t;
+using cuda::std::uint_least64_t;
+
+using cuda::std::uintmax_t;
+using cuda::std::uintptr_t;
+
+using size_t = unsigned long;
+
+#else
+
+#include <cstdint>
+
+namespace boost {
+namespace math {
+
+using std::int8_t;
+using std::int16_t;
+using std::int32_t;
+using std::int64_t;
+
+using std::int_fast8_t;
+using std::int_fast16_t;
+using std::int_fast32_t;
+using std::int_fast64_t;
+
+using std::int_least8_t;
+using std::int_least16_t;
+using std::int_least32_t;
+using std::int_least64_t;
+
+using std::intmax_t;
+using std::intptr_t;
+
+using std::uint8_t;
+using std::uint16_t;
+using std::uint32_t;
+using std::uint64_t;
+
+using std::uint_fast8_t;
+using std::uint_fast16_t;
+using std::uint_fast32_t;
+using std::uint_fast64_t;
+
+using std::uint_least8_t;
+using std::uint_least16_t;
+using std::uint_least32_t;
+using std::uint_least64_t;
+
+using std::uintmax_t;
+using std::uintptr_t;
+
+using std::size_t;
+
+#endif
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_TOOLS_CSTDINT
diff --git a/include/boost/math/tools/detail/polynomial_horner1_10.hpp b/include/boost/math/tools/detail/polynomial_horner1_10.hpp
index 6876af2d24..04ad90b69b 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_10.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_10.hpp
@@ -12,67 +12,67 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_11.hpp b/include/boost/math/tools/detail/polynomial_horner1_11.hpp
index a5154c7a68..f99ab82507 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_11.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_11.hpp
@@ -12,73 +12,73 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_12.hpp b/include/boost/math/tools/detail/polynomial_horner1_12.hpp
index 82bf88c28e..3006ebe51e 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_12.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_12.hpp
@@ -12,79 +12,79 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_13.hpp b/include/boost/math/tools/detail/polynomial_horner1_13.hpp
index f61c553dd9..0f11189097 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_13.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_13.hpp
@@ -12,85 +12,85 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_14.hpp b/include/boost/math/tools/detail/polynomial_horner1_14.hpp
index 76e9f07b25..caba4b97ea 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_14.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_14.hpp
@@ -12,91 +12,91 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_15.hpp b/include/boost/math/tools/detail/polynomial_horner1_15.hpp
index bca8cf7241..c8f42ac813 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_15.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_15.hpp
@@ -12,97 +12,97 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_16.hpp b/include/boost/math/tools/detail/polynomial_horner1_16.hpp
index 16ddb081dd..2ed591ccf5 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_16.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_16.hpp
@@ -12,103 +12,103 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_17.hpp b/include/boost/math/tools/detail/polynomial_horner1_17.hpp
index 5828621fb8..5e9fc8cd7c 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_17.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_17.hpp
@@ -12,109 +12,109 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_18.hpp b/include/boost/math/tools/detail/polynomial_horner1_18.hpp
index a2a1c12f4c..ffb62ff049 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_18.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_18.hpp
@@ -12,115 +12,115 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_19.hpp b/include/boost/math/tools/detail/polynomial_horner1_19.hpp
index 83ede26b5a..56df108ac8 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_19.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_19.hpp
@@ -12,121 +12,121 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_2.hpp b/include/boost/math/tools/detail/polynomial_horner1_2.hpp
index 93d0f7c9c8..63091ebddd 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_2.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_2.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_20.hpp b/include/boost/math/tools/detail/polynomial_horner1_20.hpp
index d770209113..c16e5143ec 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_20.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_20.hpp
@@ -12,127 +12,127 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((((((((((((((a[19] * x + a[18]) * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_3.hpp b/include/boost/math/tools/detail/polynomial_horner1_3.hpp
index 0fde1a7430..0aeccc1115 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_3.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_3.hpp
@@ -12,25 +12,25 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_4.hpp b/include/boost/math/tools/detail/polynomial_horner1_4.hpp
index 9e589791c3..61058fce84 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_4.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_4.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_5.hpp b/include/boost/math/tools/detail/polynomial_horner1_5.hpp
index 64dc00251d..47021bc509 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_5.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_5.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_6.hpp b/include/boost/math/tools/detail/polynomial_horner1_6.hpp
index dbc06347f3..bfd24371d5 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_6.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_6.hpp
@@ -12,43 +12,43 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_7.hpp b/include/boost/math/tools/detail/polynomial_horner1_7.hpp
index 1472b2ede0..50ddca63ff 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_7.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_7.hpp
@@ -12,49 +12,49 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_8.hpp b/include/boost/math/tools/detail/polynomial_horner1_8.hpp
index 95edfa0c60..3be7ba4d16 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_8.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_8.hpp
@@ -12,55 +12,55 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner1_9.hpp b/include/boost/math/tools/detail/polynomial_horner1_9.hpp
index f434a26c4b..4ec53c48bd 100644
--- a/include/boost/math/tools/detail/polynomial_horner1_9.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner1_9.hpp
@@ -12,61 +12,61 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner2_10.hpp b/include/boost/math/tools/detail/polynomial_horner2_10.hpp
index 1fce239a47..f242d7464e 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_10.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_10.hpp
@@ -12,72 +12,72 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_11.hpp b/include/boost/math/tools/detail/polynomial_horner2_11.hpp
index 3cf086c3b1..edf7f86c52 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_11.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_11.hpp
@@ -12,79 +12,79 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_12.hpp b/include/boost/math/tools/detail/polynomial_horner2_12.hpp
index e9f8eae7c6..969c9c4ddd 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_12.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_12.hpp
@@ -12,86 +12,86 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_13.hpp b/include/boost/math/tools/detail/polynomial_horner2_13.hpp
index d9d2a5e24a..ed4559d11e 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_13.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_13.hpp
@@ -12,93 +12,93 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_14.hpp b/include/boost/math/tools/detail/polynomial_horner2_14.hpp
index b4280597a8..4b79eb78a4 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_14.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_14.hpp
@@ -12,100 +12,100 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_15.hpp b/include/boost/math/tools/detail/polynomial_horner2_15.hpp
index 89a7a46f53..28b62eee75 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_15.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_15.hpp
@@ -12,107 +12,107 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_16.hpp b/include/boost/math/tools/detail/polynomial_horner2_16.hpp
index d2379d2bc1..6368b40548 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_16.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_16.hpp
@@ -12,114 +12,114 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_17.hpp b/include/boost/math/tools/detail/polynomial_horner2_17.hpp
index d1921efc49..551e6191cf 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_17.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_17.hpp
@@ -12,121 +12,121 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_18.hpp b/include/boost/math/tools/detail/polynomial_horner2_18.hpp
index 945c4e403b..19cfdc19e1 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_18.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_18.hpp
@@ -12,128 +12,128 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_19.hpp b/include/boost/math/tools/detail/polynomial_horner2_19.hpp
index a3049354ca..9ea87fd93b 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_19.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_19.hpp
@@ -12,135 +12,135 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_2.hpp b/include/boost/math/tools/detail/polynomial_horner2_2.hpp
index 8b3a7dcd83..1982a81f3f 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_2.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_2.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner2_20.hpp b/include/boost/math/tools/detail/polynomial_horner2_20.hpp
index a4ccc93b3e..23afe55e05 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_20.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_20.hpp
@@ -12,142 +12,142 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((a[9] * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((a[10] * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((a[11] * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((a[12] * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((a[13] * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((a[14] * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((((((a[15] * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + (((((((a[16] * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((((((((a[17] * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((((((((a[19] * x2 + a[17]) * x2 + a[15]) * x2 + a[13]) * x2 + a[11]) * x2 + a[9]) * x2 + a[7]) * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((((((((a[18] * x2 + a[16]) * x2 + a[14]) * x2 + a[12]) * x2 + a[10]) * x2 + a[8]) * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_3.hpp b/include/boost/math/tools/detail/polynomial_horner2_3.hpp
index d0b988cf81..f9d6953b82 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_3.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_3.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner2_4.hpp b/include/boost/math/tools/detail/polynomial_horner2_4.hpp
index 7f0708680c..8f11de5b31 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_4.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_4.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner2_5.hpp b/include/boost/math/tools/detail/polynomial_horner2_5.hpp
index f4e7b809b0..eba9ee9e6d 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_5.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_5.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_6.hpp b/include/boost/math/tools/detail/polynomial_horner2_6.hpp
index 764e522505..ef77c6255b 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_6.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_6.hpp
@@ -12,44 +12,44 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_7.hpp b/include/boost/math/tools/detail/polynomial_horner2_7.hpp
index 50fb3333cb..fe8d21b95f 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_7.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_7.hpp
@@ -12,51 +12,51 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_8.hpp b/include/boost/math/tools/detail/polynomial_horner2_8.hpp
index c74b19d435..de1810a940 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_8.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_8.hpp
@@ -12,58 +12,58 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
diff --git a/include/boost/math/tools/detail/polynomial_horner2_9.hpp b/include/boost/math/tools/detail/polynomial_horner2_9.hpp
index 7d6e7e421f..5c53b73299 100644
--- a/include/boost/math/tools/detail/polynomial_horner2_9.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner2_9.hpp
@@ -12,65 +12,65 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((a[4] * x2 + a[2]) * x2 + a[0] + (a[3] * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[5] * x2 + a[3]) * x2 + a[1]) * x + (a[4] * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>(((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + ((a[5] * x2 + a[3]) * x2 + a[1]) * x);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x + ((a[6] * x2 + a[4]) * x2 + a[2]) * x2 + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    return static_cast<V>((((a[8] * x2 + a[6]) * x2 + a[4]) * x2 + a[2]) * x2 + a[0] + (((a[7] * x2 + a[5]) * x2 + a[3]) * x2 + a[1]) * x);
diff --git a/include/boost/math/tools/detail/polynomial_horner3_10.hpp b/include/boost/math/tools/detail/polynomial_horner3_10.hpp
index b980b1b3d2..7fb5bb4745 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_10.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_10.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_11.hpp b/include/boost/math/tools/detail/polynomial_horner3_11.hpp
index 2ab4b2ac3a..9f22820dea 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_11.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_11.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_12.hpp b/include/boost/math/tools/detail/polynomial_horner3_12.hpp
index 4606427277..b049613766 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_12.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_12.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_13.hpp b/include/boost/math/tools/detail/polynomial_horner3_13.hpp
index d35fa904f2..f39a33cc90 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_13.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_13.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_14.hpp b/include/boost/math/tools/detail/polynomial_horner3_14.hpp
index 346b9dc28e..32b9e7db29 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_14.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_14.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_15.hpp b/include/boost/math/tools/detail/polynomial_horner3_15.hpp
index 500bc32317..55325c84b9 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_15.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_15.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_16.hpp b/include/boost/math/tools/detail/polynomial_horner3_16.hpp
index 269f367390..f71d62f50c 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_16.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_16.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_17.hpp b/include/boost/math/tools/detail/polynomial_horner3_17.hpp
index 1d97a6f154..783a34558c 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_17.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_17.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_18.hpp b/include/boost/math/tools/detail/polynomial_horner3_18.hpp
index 80e49cbb12..b10b270c41 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_18.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_18.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_19.hpp b/include/boost/math/tools/detail/polynomial_horner3_19.hpp
index eae3775e06..21147591c8 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_19.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_19.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -406,7 +406,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_2.hpp b/include/boost/math/tools/detail/polynomial_horner3_2.hpp
index 6281674205..ee3e35e6ca 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_2.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_2.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner3_20.hpp b/include/boost/math/tools/detail/polynomial_horner3_20.hpp
index 00f8caae2a..338aeb7dbc 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_20.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_20.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -127,7 +127,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -150,7 +150,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -175,7 +175,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -202,7 +202,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -231,7 +231,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -262,7 +262,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -295,7 +295,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -330,7 +330,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -367,7 +367,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -406,7 +406,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -447,7 +447,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_3.hpp b/include/boost/math/tools/detail/polynomial_horner3_3.hpp
index 8f69c2bfef..1eee0cfac0 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_3.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_3.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner3_4.hpp b/include/boost/math/tools/detail/polynomial_horner3_4.hpp
index 34db812343..efa7fba485 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_4.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_4.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
diff --git a/include/boost/math/tools/detail/polynomial_horner3_5.hpp b/include/boost/math/tools/detail/polynomial_horner3_5.hpp
index ed955e4a70..f150e2a4a4 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_5.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_5.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_6.hpp b/include/boost/math/tools/detail/polynomial_horner3_6.hpp
index 96d9a6ddad..fe679e74d2 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_6.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_6.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_7.hpp b/include/boost/math/tools/detail/polynomial_horner3_7.hpp
index 80a9f3af4a..76f080ad9c 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_7.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_7.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_8.hpp b/include/boost/math/tools/detail/polynomial_horner3_8.hpp
index ee526ad736..75634bdfc6 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_8.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_8.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/polynomial_horner3_9.hpp b/include/boost/math/tools/detail/polynomial_horner3_9.hpp
index a17ce909c7..63a40580d1 100644
--- a/include/boost/math/tools/detail/polynomial_horner3_9.hpp
+++ b/include/boost/math/tools/detail/polynomial_horner3_9.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[1] * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[2] * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -55,7 +55,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -70,7 +70,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -87,7 +87,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
@@ -106,7 +106,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_c
 }
 
 template <class T, class V>
-inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    V x2 = x * x;
    V t[2];
diff --git a/include/boost/math/tools/detail/rational_horner1_10.hpp b/include/boost/math/tools/detail/rational_horner1_10.hpp
index 6a04128ca6..e2f6c6d2fb 100644
--- a/include/boost/math/tools/detail/rational_horner1_10.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_10.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_11.hpp b/include/boost/math/tools/detail/rational_horner1_11.hpp
index d43e53433f..31d480a65a 100644
--- a/include/boost/math/tools/detail/rational_horner1_11.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_11.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_12.hpp b/include/boost/math/tools/detail/rational_horner1_12.hpp
index 33d19eb380..c08a85b3a6 100644
--- a/include/boost/math/tools/detail/rational_horner1_12.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_12.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_13.hpp b/include/boost/math/tools/detail/rational_horner1_13.hpp
index 2069aa5150..cc87ec2dc7 100644
--- a/include/boost/math/tools/detail/rational_horner1_13.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_13.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_14.hpp b/include/boost/math/tools/detail/rational_horner1_14.hpp
index 5ebcde6260..256473710f 100644
--- a/include/boost/math/tools/detail/rational_horner1_14.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_14.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_15.hpp b/include/boost/math/tools/detail/rational_horner1_15.hpp
index 9da8e1b711..2ab24814e7 100644
--- a/include/boost/math/tools/detail/rational_horner1_15.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_15.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_16.hpp b/include/boost/math/tools/detail/rational_horner1_16.hpp
index 203ba78196..dce0b5e9b1 100644
--- a/include/boost/math/tools/detail/rational_horner1_16.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_16.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_17.hpp b/include/boost/math/tools/detail/rational_horner1_17.hpp
index e382d2931a..8e875d6576 100644
--- a/include/boost/math/tools/detail/rational_horner1_17.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_17.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_18.hpp b/include/boost/math/tools/detail/rational_horner1_18.hpp
index 66f668ee35..ab67a970b0 100644
--- a/include/boost/math/tools/detail/rational_horner1_18.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_18.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_19.hpp b/include/boost/math/tools/detail/rational_horner1_19.hpp
index 9cd1391434..dc300343a5 100644
--- a/include/boost/math/tools/detail/rational_horner1_19.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_19.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -228,7 +228,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((((b[18] * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_2.hpp b/include/boost/math/tools/detail/rational_horner1_2.hpp
index f42cbfc645..c6b1ef9ef9 100644
--- a/include/boost/math/tools/detail/rational_horner1_2.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_2.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_20.hpp b/include/boost/math/tools/detail/rational_horner1_20.hpp
index 0a6c2a0f26..5b8b170c15 100644
--- a/include/boost/math/tools/detail/rational_horner1_20.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_20.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -120,7 +120,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((a[9] * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((b[9] * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -132,7 +132,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((a[10] * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((b[10] * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -144,7 +144,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((a[11] * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((b[11] * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -156,7 +156,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((a[12] * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((b[12] * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -168,7 +168,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((a[13] * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((b[13] * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((a[14] * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((b[14] * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -192,7 +192,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((a[15] * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((b[15] * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -204,7 +204,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((((a[16] * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((b[16] * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -216,7 +216,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((((a[17] * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((b[17] * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -228,7 +228,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((((((((((((a[18] * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((((((((((((b[18] * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -240,7 +240,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((((((((((((((a[19] * x + a[18]) * x + a[17]) * x + a[16]) * x + a[15]) * x + a[14]) * x + a[13]) * x + a[12]) * x + a[11]) * x + a[10]) * x + a[9]) * x + a[8]) * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((((((((((((((b[19] * x + b[18]) * x + b[17]) * x + b[16]) * x + b[15]) * x + b[14]) * x + b[13]) * x + b[12]) * x + b[11]) * x + b[10]) * x + b[9]) * x + b[8]) * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_3.hpp b/include/boost/math/tools/detail/rational_horner1_3.hpp
index d0ab213b3c..6933e22bf1 100644
--- a/include/boost/math/tools/detail/rational_horner1_3.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_3.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_4.hpp b/include/boost/math/tools/detail/rational_horner1_4.hpp
index 44f40114a1..49b9835778 100644
--- a/include/boost/math/tools/detail/rational_horner1_4.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_4.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_5.hpp b/include/boost/math/tools/detail/rational_horner1_5.hpp
index db032f15e1..91e97ff445 100644
--- a/include/boost/math/tools/detail/rational_horner1_5.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_5.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_6.hpp b/include/boost/math/tools/detail/rational_horner1_6.hpp
index 4de5143ca9..876b026cde 100644
--- a/include/boost/math/tools/detail/rational_horner1_6.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_6.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_7.hpp b/include/boost/math/tools/detail/rational_horner1_7.hpp
index 7d4ef69e9a..bcac18293c 100644
--- a/include/boost/math/tools/detail/rational_horner1_7.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_7.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_8.hpp b/include/boost/math/tools/detail/rational_horner1_8.hpp
index bf4d7f57e3..55e30a53e8 100644
--- a/include/boost/math/tools/detail/rational_horner1_8.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_8.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner1_9.hpp b/include/boost/math/tools/detail/rational_horner1_9.hpp
index cf3be7f824..c7087de508 100644
--- a/include/boost/math/tools/detail/rational_horner1_9.hpp
+++ b/include/boost/math/tools/detail/rational_horner1_9.hpp
@@ -12,19 +12,19 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
@@ -36,7 +36,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
@@ -48,7 +48,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
@@ -60,7 +60,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((a[4] * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((b[4] * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -72,7 +72,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((a[5] * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((b[5] * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -84,7 +84,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((a[6] * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((b[6] * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -96,7 +96,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>((((((((a[7] * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / (((((((b[7] * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
@@ -108,7 +108,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
      return static_cast<V>(((((((((a[8] * x + a[7]) * x + a[6]) * x + a[5]) * x + a[4]) * x + a[3]) * x + a[2]) * x + a[1]) * x + a[0]) / ((((((((b[8] * x + b[7]) * x + b[6]) * x + b[5]) * x + b[4]) * x + b[3]) * x + b[2]) * x + b[1]) * x + b[0]));
diff --git a/include/boost/math/tools/detail/rational_horner2_10.hpp b/include/boost/math/tools/detail/rational_horner2_10.hpp
index 1a59aa334c..4d74a714d5 100644
--- a/include/boost/math/tools/detail/rational_horner2_10.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_10.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_11.hpp b/include/boost/math/tools/detail/rational_horner2_11.hpp
index 1333a40bc8..15f1cf2556 100644
--- a/include/boost/math/tools/detail/rational_horner2_11.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_11.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_12.hpp b/include/boost/math/tools/detail/rational_horner2_12.hpp
index a37cf5a05f..24e9d9e7f7 100644
--- a/include/boost/math/tools/detail/rational_horner2_12.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_12.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_13.hpp b/include/boost/math/tools/detail/rational_horner2_13.hpp
index 648f3079c5..495f88525d 100644
--- a/include/boost/math/tools/detail/rational_horner2_13.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_13.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_14.hpp b/include/boost/math/tools/detail/rational_horner2_14.hpp
index 7771c3da91..273e723b6c 100644
--- a/include/boost/math/tools/detail/rational_horner2_14.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_14.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_15.hpp b/include/boost/math/tools/detail/rational_horner2_15.hpp
index 03fae0d947..c7e24ec7db 100644
--- a/include/boost/math/tools/detail/rational_horner2_15.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_15.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_16.hpp b/include/boost/math/tools/detail/rational_horner2_16.hpp
index d8565e104b..2eebd702bc 100644
--- a/include/boost/math/tools/detail/rational_horner2_16.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_16.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_17.hpp b/include/boost/math/tools/detail/rational_horner2_17.hpp
index bd8990e0c2..1fee63047f 100644
--- a/include/boost/math/tools/detail/rational_horner2_17.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_17.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_18.hpp b/include/boost/math/tools/detail/rational_horner2_18.hpp
index 38b99ecf17..7aedbf2aad 100644
--- a/include/boost/math/tools/detail/rational_horner2_18.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_18.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_19.hpp b/include/boost/math/tools/detail/rational_horner2_19.hpp
index b77d2eb0b9..1c36a267cb 100644
--- a/include/boost/math/tools/detail/rational_horner2_19.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_19.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -266,7 +266,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_2.hpp b/include/boost/math/tools/detail/rational_horner2_2.hpp
index 9c4fe47a74..bb2e2c4dcf 100644
--- a/include/boost/math/tools/detail/rational_horner2_2.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_2.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
diff --git a/include/boost/math/tools/detail/rational_horner2_20.hpp b/include/boost/math/tools/detail/rational_horner2_20.hpp
index 485639dcef..a591b901c9 100644
--- a/include/boost/math/tools/detail/rational_horner2_20.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_20.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -122,7 +122,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -138,7 +138,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -154,7 +154,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -170,7 +170,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -186,7 +186,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -202,7 +202,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -218,7 +218,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -234,7 +234,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -250,7 +250,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -266,7 +266,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -282,7 +282,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_3.hpp b/include/boost/math/tools/detail/rational_horner2_3.hpp
index d19993cce1..0b410d8bbe 100644
--- a/include/boost/math/tools/detail/rational_horner2_3.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_3.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
diff --git a/include/boost/math/tools/detail/rational_horner2_4.hpp b/include/boost/math/tools/detail/rational_horner2_4.hpp
index 847f26dc4e..07a9a2c5ad 100644
--- a/include/boost/math/tools/detail/rational_horner2_4.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_4.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
diff --git a/include/boost/math/tools/detail/rational_horner2_5.hpp b/include/boost/math/tools/detail/rational_horner2_5.hpp
index 8633d5dc13..0933ddfbc4 100644
--- a/include/boost/math/tools/detail/rational_horner2_5.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_5.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_6.hpp b/include/boost/math/tools/detail/rational_horner2_6.hpp
index 4555426334..dee9c6e168 100644
--- a/include/boost/math/tools/detail/rational_horner2_6.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_6.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_7.hpp b/include/boost/math/tools/detail/rational_horner2_7.hpp
index 6a5c704d1c..6f9a85838c 100644
--- a/include/boost/math/tools/detail/rational_horner2_7.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_7.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_8.hpp b/include/boost/math/tools/detail/rational_horner2_8.hpp
index 9ec861fc5f..33dda23bba 100644
--- a/include/boost/math/tools/detail/rational_horner2_8.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_8.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner2_9.hpp b/include/boost/math/tools/detail/rational_horner2_9.hpp
index c76755cb22..a9025a8900 100644
--- a/include/boost/math/tools/detail/rational_horner2_9.hpp
+++ b/include/boost/math/tools/detail/rational_horner2_9.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -58,7 +58,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -74,7 +74,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -90,7 +90,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -106,7 +106,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_10.hpp b/include/boost/math/tools/detail/rational_horner3_10.hpp
index 773532cd55..b7cec124e2 100644
--- a/include/boost/math/tools/detail/rational_horner3_10.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_10.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_11.hpp b/include/boost/math/tools/detail/rational_horner3_11.hpp
index a712fff090..579f0e4868 100644
--- a/include/boost/math/tools/detail/rational_horner3_11.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_11.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_12.hpp b/include/boost/math/tools/detail/rational_horner3_12.hpp
index 5b87374abf..54300dd08e 100644
--- a/include/boost/math/tools/detail/rational_horner3_12.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_12.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_13.hpp b/include/boost/math/tools/detail/rational_horner3_13.hpp
index 11591668b8..d2fc7b6331 100644
--- a/include/boost/math/tools/detail/rational_horner3_13.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_13.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_14.hpp b/include/boost/math/tools/detail/rational_horner3_14.hpp
index 04f31249d4..0b7675f494 100644
--- a/include/boost/math/tools/detail/rational_horner3_14.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_14.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_15.hpp b/include/boost/math/tools/detail/rational_horner3_15.hpp
index 4b9cffd48a..8286caed0b 100644
--- a/include/boost/math/tools/detail/rational_horner3_15.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_15.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_16.hpp b/include/boost/math/tools/detail/rational_horner3_16.hpp
index 3a384dcc5f..fc823e4162 100644
--- a/include/boost/math/tools/detail/rational_horner3_16.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_16.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_17.hpp b/include/boost/math/tools/detail/rational_horner3_17.hpp
index 1c9435e74a..cf7f75a706 100644
--- a/include/boost/math/tools/detail/rational_horner3_17.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_17.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_18.hpp b/include/boost/math/tools/detail/rational_horner3_18.hpp
index b133e2bafc..f853ed3e0c 100644
--- a/include/boost/math/tools/detail/rational_horner3_18.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_18.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_19.hpp b/include/boost/math/tools/detail/rational_horner3_19.hpp
index ca35d3b68f..d44e22c90b 100644
--- a/include/boost/math/tools/detail/rational_horner3_19.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_19.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1302,7 +1302,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_2.hpp b/include/boost/math/tools/detail/rational_horner3_2.hpp
index 9c4fe47a74..bb2e2c4dcf 100644
--- a/include/boost/math/tools/detail/rational_horner3_2.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_2.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
diff --git a/include/boost/math/tools/detail/rational_horner3_20.hpp b/include/boost/math/tools/detail/rational_horner3_20.hpp
index 58109ac305..967edf0832 100644
--- a/include/boost/math/tools/detail/rational_horner3_20.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_20.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -312,7 +312,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 10>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -390,7 +390,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 11>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -476,7 +476,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 12>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -570,7 +570,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 13>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -672,7 +672,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 14>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -782,7 +782,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 15>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -900,7 +900,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 16>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1026,7 +1026,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 17>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1160,7 +1160,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 18>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1302,7 +1302,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 19>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -1452,7 +1452,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 20>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_3.hpp b/include/boost/math/tools/detail/rational_horner3_3.hpp
index d19993cce1..0b410d8bbe 100644
--- a/include/boost/math/tools/detail/rational_horner3_3.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_3.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
diff --git a/include/boost/math/tools/detail/rational_horner3_4.hpp b/include/boost/math/tools/detail/rational_horner3_4.hpp
index 847f26dc4e..07a9a2c5ad 100644
--- a/include/boost/math/tools/detail/rational_horner3_4.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_4.hpp
@@ -12,31 +12,31 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
diff --git a/include/boost/math/tools/detail/rational_horner3_5.hpp b/include/boost/math/tools/detail/rational_horner3_5.hpp
index cc77fd560c..62c76dd506 100644
--- a/include/boost/math/tools/detail/rational_horner3_5.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_5.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_6.hpp b/include/boost/math/tools/detail/rational_horner3_6.hpp
index 73920ad018..f81a068acb 100644
--- a/include/boost/math/tools/detail/rational_horner3_6.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_6.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_7.hpp b/include/boost/math/tools/detail/rational_horner3_7.hpp
index 8e30ecf310..fea457ccf8 100644
--- a/include/boost/math/tools/detail/rational_horner3_7.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_7.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_8.hpp b/include/boost/math/tools/detail/rational_horner3_8.hpp
index a8f93f3a3e..306e2a41d9 100644
--- a/include/boost/math/tools/detail/rational_horner3_8.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_8.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/detail/rational_horner3_9.hpp b/include/boost/math/tools/detail/rational_horner3_9.hpp
index 064d984d3f..93a3527c18 100644
--- a/include/boost/math/tools/detail/rational_horner3_9.hpp
+++ b/include/boost/math/tools/detail/rational_horner3_9.hpp
@@ -12,37 +12,37 @@
 namespace boost{ namespace math{ namespace tools{ namespace detail{
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(0);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(a[0]) / static_cast<V>(b[0]);
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*) BOOST_MATH_NOEXCEPT(V)
 {
    return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 5>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -80,7 +80,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 6>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -126,7 +126,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 7>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -180,7 +180,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 8>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
@@ -242,7 +242,7 @@ inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::
 }
 
 template <class T, class U, class V>
-inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 9>*) BOOST_MATH_NOEXCEPT(V)
 {
    if((-1 <= x) && (x <= 1))
    {
diff --git a/include/boost/math/tools/fraction.hpp b/include/boost/math/tools/fraction.hpp
index a64c070258..f36d024c40 100644
--- a/include/boost/math/tools/fraction.hpp
+++ b/include/boost/math/tools/fraction.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2005-2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,11 +11,13 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/tools/precision.hpp>
 #include <boost/math/tools/complex.hpp>
-#include <type_traits>
-#include <cstdint>
-#include <cmath>
+#include <boost/math/tools/cstdint.hpp>
 
 namespace boost{ namespace math{ namespace tools{
 
@@ -22,10 +25,10 @@ namespace detail
 {
 
    template <typename T>
-   struct is_pair : public std::false_type{};
+   struct is_pair : public boost::math::false_type{};
 
    template <typename T, typename U>
-   struct is_pair<std::pair<T,U>> : public std::true_type{};
+   struct is_pair<boost::math::pair<T,U>> : public boost::math::true_type{};
 
    template <typename Gen>
    struct fraction_traits_simple
@@ -33,11 +36,11 @@ namespace detail
       using result_type = typename Gen::result_type;
       using  value_type = typename Gen::result_type;
 
-      static result_type a(const value_type&) BOOST_MATH_NOEXCEPT(value_type)
+      BOOST_MATH_GPU_ENABLED static result_type a(const value_type&) BOOST_MATH_NOEXCEPT(value_type)
       {
          return 1;
       }
-      static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type)
+      BOOST_MATH_GPU_ENABLED static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type)
       {
          return v;
       }
@@ -49,11 +52,11 @@ namespace detail
       using  value_type = typename Gen::result_type;
       using result_type = typename value_type::first_type;
 
-      static result_type a(const value_type& v) BOOST_MATH_NOEXCEPT(value_type)
+      BOOST_MATH_GPU_ENABLED static result_type a(const value_type& v) BOOST_MATH_NOEXCEPT(value_type)
       {
          return v.first;
       }
-      static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type)
+      BOOST_MATH_GPU_ENABLED static result_type b(const value_type& v) BOOST_MATH_NOEXCEPT(value_type)
       {
          return v.second;
       }
@@ -61,7 +64,7 @@ namespace detail
 
    template <typename Gen>
    struct fraction_traits
-       : public std::conditional<
+       : public boost::math::conditional<
          is_pair<typename Gen::result_type>::value,
          fraction_traits_pair<Gen>,
          fraction_traits_simple<Gen>>::type
@@ -74,7 +77,7 @@ namespace detail
       // For float, double, and long double, 1/min_value<T>() is finite.
       // But for mpfr_float and cpp_bin_float, 1/min_value<T>() is inf.
       // Multiply the min by 16 so that the reciprocal doesn't overflow.
-      static T get() {
+      BOOST_MATH_GPU_ENABLED static T get() {
          return 16*tools::min_value<T>();
       }
    };
@@ -82,13 +85,15 @@ namespace detail
    struct tiny_value<T, true>
    {
       using value_type = typename T::value_type;
-      static T get() {
+      BOOST_MATH_GPU_ENABLED static T get() {
          return 16*tools::min_value<value_type>();
       }
    };
 
 } // namespace detail
 
+namespace detail {
+
 //
 // continued_fraction_b
 // Evaluates:
@@ -103,9 +108,15 @@ namespace detail
 //
 // Note that the first a0 returned by generator Gen is discarded.
 //
+
 template <typename Gen, typename U>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, const U& factor, std::uintmax_t& max_terms)
-      noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b_impl(Gen& g, const U& factor, boost::math::uintmax_t& max_terms)
+      noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+      #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+      // SYCL can not handle this condition so we only check float on that platform
+      && noexcept(std::declval<Gen>()())
+      #endif
+      )
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -129,7 +140,7 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(G
    C = f;
    D = 0;
 
-   std::uintmax_t counter(max_terms);
+   boost::math::uintmax_t counter(max_terms);
    do{
       v = g();
       D = traits::b(v) + traits::a(v) * D;
@@ -148,17 +159,38 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(G
    return f;
 }
 
+} // namespace detail
+
+template <typename Gen, typename U>
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, const U& factor, boost::math::uintmax_t& max_terms)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+         #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+         && noexcept(std::declval<Gen>()())
+         #endif
+         )
+{
+   return detail::continued_fraction_b_impl(g, factor, max_terms);
+}
+
 template <typename Gen, typename U>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, const U& factor)
-   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, const U& factor)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
 {
-   std::uintmax_t max_terms = (std::numeric_limits<std::uintmax_t>::max)();
-   return continued_fraction_b(g, factor, max_terms);
+   boost::math::uintmax_t max_terms = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
+   return detail::continued_fraction_b_impl(g, factor, max_terms);
 }
 
 template <typename Gen>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, int bits)
-   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, int bits)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -166,13 +198,17 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(G
    using result_type = typename traits::result_type;
 
    result_type factor = ldexp(1.0f, 1 - bits); // 1 / pow(result_type(2), bits);
-   std::uintmax_t max_terms = (std::numeric_limits<std::uintmax_t>::max)();
-   return continued_fraction_b(g, factor, max_terms);
+   boost::math::uintmax_t max_terms = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
+   return detail::continued_fraction_b_impl(g, factor, max_terms);
 }
 
 template <typename Gen>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, int bits, std::uintmax_t& max_terms)
-   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(Gen& g, int bits, boost::math::uintmax_t& max_terms)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -180,9 +216,11 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(G
    using result_type = typename traits::result_type;
 
    result_type factor = ldexp(1.0f, 1 - bits); // 1 / pow(result_type(2), bits);
-   return continued_fraction_b(g, factor, max_terms);
+   return detail::continued_fraction_b_impl(g, factor, max_terms);
 }
 
+namespace detail {
+
 //
 // continued_fraction_a
 // Evaluates:
@@ -198,8 +236,12 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_b(G
 // Note that the first a1 and b1 returned by generator Gen are both used.
 //
 template <typename Gen, typename U>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, const U& factor, std::uintmax_t& max_terms)
-   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a_impl(Gen& g, const U& factor, boost::math::uintmax_t& max_terms)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -224,7 +266,7 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(G
    C = f;
    D = 0;
 
-   std::uintmax_t counter(max_terms);
+   boost::math::uintmax_t counter(max_terms);
 
    do{
       v = g();
@@ -244,17 +286,38 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(G
    return a0/f;
 }
 
+} // namespace detail
+
+template <typename Gen, typename U>
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, const U& factor, boost::math::uintmax_t& max_terms)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
+{
+   return detail::continued_fraction_a_impl(g, factor, max_terms);
+}
+
 template <typename Gen, typename U>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, const U& factor)
-   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, const U& factor)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type)
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
 {
-   std::uintmax_t max_iter = (std::numeric_limits<std::uintmax_t>::max)();
-   return continued_fraction_a(g, factor, max_iter);
+   boost::math::uintmax_t max_iter = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
+   return detail::continued_fraction_a_impl(g, factor, max_iter);
 }
 
 template <typename Gen>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, int bits)
-   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, int bits)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -262,14 +325,18 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(G
    typedef typename traits::result_type result_type;
 
    result_type factor = ldexp(1.0f, 1-bits); // 1 / pow(result_type(2), bits);
-   std::uintmax_t max_iter = (std::numeric_limits<std::uintmax_t>::max)();
+   boost::math::uintmax_t max_iter = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
 
-   return continued_fraction_a(g, factor, max_iter);
+   return detail::continued_fraction_a_impl(g, factor, max_iter);
 }
 
 template <typename Gen>
-inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, int bits, std::uintmax_t& max_terms)
-   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) && noexcept(std::declval<Gen>()()))
+BOOST_MATH_GPU_ENABLED inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(Gen& g, int bits, boost::math::uintmax_t& max_terms)
+   noexcept(BOOST_MATH_IS_FLOAT(typename detail::fraction_traits<Gen>::result_type) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<Gen>()())
+   #endif
+   )
 {
    BOOST_MATH_STD_USING // ADL of std names
 
@@ -277,7 +344,7 @@ inline typename detail::fraction_traits<Gen>::result_type continued_fraction_a(G
    using result_type = typename traits::result_type;
 
    result_type factor = ldexp(1.0f, 1-bits); // 1 / pow(result_type(2), bits);
-   return continued_fraction_a(g, factor, max_terms);
+   return detail::continued_fraction_a_impl(g, factor, max_terms);
 }
 
 } // namespace tools
diff --git a/include/boost/math/tools/is_detected.hpp b/include/boost/math/tools/is_detected.hpp
index 8dfe86b740..93fa96f60b 100644
--- a/include/boost/math/tools/is_detected.hpp
+++ b/include/boost/math/tools/is_detected.hpp
@@ -8,7 +8,7 @@
 #ifndef BOOST_MATH_TOOLS_IS_DETECTED_HPP
 #define BOOST_MATH_TOOLS_IS_DETECTED_HPP
 
-#include <type_traits>
+#include <boost/math/tools/type_traits.hpp>
 
 namespace boost { namespace math { namespace tools {
 
@@ -20,14 +20,14 @@ namespace detail {
 template <typename Default, typename AlwaysVoid, template<typename...> class Op, typename... Args>
 struct detector
 {
-    using value_t = std::false_type;
+    using value_t = boost::math::false_type;
     using type = Default;
 };
 
 template <typename Default, template<typename...> class Op, typename... Args>
 struct detector<Default, void_t<Op<Args...>>, Op, Args...>
 {
-    using value_t = std::true_type;
+    using value_t = boost::math::true_type;
     using type = Op<Args...>;
 };
 
diff --git a/include/boost/math/tools/minima.hpp b/include/boost/math/tools/minima.hpp
index 6070fc5307..a6be94cb2b 100644
--- a/include/boost/math/tools/minima.hpp
+++ b/include/boost/math/tools/minima.hpp
@@ -11,20 +11,26 @@
 #pragma once
 #endif
 
-#include <cstdint>
-#include <cmath>
-#include <utility>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/utility.hpp>
 #include <boost/math/policies/policy.hpp>
 
 namespace boost{ namespace math{ namespace tools{
 
 template <class F, class T>
-std::pair<T, T> brent_find_minima(F f, T min, T max, int bits, std::uintmax_t& max_iter)
-   noexcept(BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+BOOST_MATH_GPU_ENABLED boost::math::pair<T, T> brent_find_minima(F f, T min, T max, int bits, boost::math::uintmax_t& max_iter)
+   noexcept(BOOST_MATH_IS_FLOAT(T) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<F>()(std::declval<T>()))
+   #endif
+   )
 {
    BOOST_MATH_STD_USING
-   bits = (std::min)(policies::digits<T, policies::policy<> >() / 2, bits);
+   bits = (boost::math::min)(policies::digits<T, policies::policy<> >() / 2, bits);
    T tolerance = static_cast<T>(ldexp(1.0, 1-bits));
    T x;  // minima so far
    T w;  // second best point
@@ -42,7 +48,7 @@ std::pair<T, T> brent_find_minima(F f, T min, T max, int bits, std::uintmax_t& m
    fw = fv = fx = f(x);
    delta2 = delta = 0;
 
-   uintmax_t count = max_iter;
+   boost::math::uintmax_t count = max_iter;
 
    do{
       // get midpoint
@@ -134,14 +140,18 @@ std::pair<T, T> brent_find_minima(F f, T min, T max, int bits, std::uintmax_t& m
 
    max_iter -= count;
 
-   return std::make_pair(x, fx);
+   return boost::math::make_pair(x, fx);
 }
 
 template <class F, class T>
-inline std::pair<T, T> brent_find_minima(F f, T min, T max, int digits)
-   noexcept(BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<T, T> brent_find_minima(F f, T min, T max, int digits)
+   noexcept(BOOST_MATH_IS_FLOAT(T)
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<F>()(std::declval<T>()))
+   #endif
+   )
 {
-   std::uintmax_t m = (std::numeric_limits<std::uintmax_t>::max)();
+   boost::math::uintmax_t m = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
    return brent_find_minima(f, min, max, digits, m);
 }
 
diff --git a/include/boost/math/tools/mp.hpp b/include/boost/math/tools/mp.hpp
index 55aac1b092..560ae8b500 100644
--- a/include/boost/math/tools/mp.hpp
+++ b/include/boost/math/tools/mp.hpp
@@ -11,9 +11,9 @@
 #ifndef BOOST_MATH_TOOLS_MP
 #define BOOST_MATH_TOOLS_MP
 
-#include <type_traits>
-#include <cstddef>
-#include <utility>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
 
 namespace boost { namespace math { namespace tools { namespace meta_programming {
 
@@ -23,12 +23,12 @@ template<typename... T>
 struct mp_list {};
 
 // Size_t
-template<std::size_t N> 
-using mp_size_t = std::integral_constant<std::size_t, N>;
+template<boost::math::size_t N> 
+using mp_size_t = boost::math::integral_constant<boost::math::size_t, N>;
 
 // Boolean
 template<bool B>
-using mp_bool = std::integral_constant<bool, B>;
+using mp_bool = boost::math::integral_constant<bool, B>;
 
 // Identity
 template<typename T>
@@ -53,7 +53,7 @@ struct mp_size_impl {};
 template<template<typename...> class L, typename... T> // Template template parameter must use class
 struct mp_size_impl<L<T...>>
 {
-    using type = std::integral_constant<std::size_t, sizeof...(T)>;
+    using type = boost::math::integral_constant<boost::math::size_t, sizeof...(T)>;
 };
 }
 
@@ -79,7 +79,7 @@ namespace detail {
 // At
 // TODO - Use tree based lookup for larger typelists
 // http://odinthenerd.blogspot.com/2017/04/tree-based-lookup-why-kvasirmpl-is.html
-template<typename L, std::size_t>
+template<typename L, boost::math::size_t>
 struct mp_at_c {};
 
 template<template<typename...> class L, typename T0, typename... T>
@@ -168,7 +168,7 @@ struct mp_at_c<L<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T...>, 1
 };
 }
 
-template<typename L, std::size_t Index>
+template<typename L, boost::math::size_t Index>
 using mp_at_c = typename detail::mp_at_c<L, Index>::type;
 
 template<typename L, typename Index>
@@ -336,25 +336,11 @@ using mp_remove_if = typename detail::mp_remove_if_impl<L, P>::type;
 template<typename L, typename Q> 
 using mp_remove_if_q = mp_remove_if<L, Q::template fn>;
 
-// Index sequence
-// Use C++14 index sequence if available
-#if defined(__cpp_lib_integer_sequence) && (__cpp_lib_integer_sequence >= 201304)
-template<std::size_t... Index>
-using index_sequence = std::index_sequence<Index...>;
-
-template<std::size_t N>
-using make_index_sequence = std::make_index_sequence<N>;
-
-template<typename... T>
-using index_sequence_for = std::index_sequence_for<T...>;
-
-#else
-
 template<typename T, T... Index>
 struct integer_sequence {};
 
-template<std::size_t... Index>
-using index_sequence = integer_sequence<std::size_t, Index...>;
+template<boost::math::size_t... Index>
+using index_sequence = integer_sequence<boost::math::size_t, Index...>;
 
 namespace detail {
 
@@ -426,13 +412,11 @@ struct make_integer_sequence_impl
 template<typename T, T N>
 using make_integer_sequence = typename detail::make_integer_sequence_impl<T, N>::type;
 
-template<std::size_t N>
-using make_index_sequence = make_integer_sequence<std::size_t, N>;
+template<boost::math::size_t N>
+using make_index_sequence = make_integer_sequence<boost::math::size_t, N>;
 
 template<typename... T>
-using index_sequence_for = make_integer_sequence<std::size_t, sizeof...(T)>;
-
-#endif 
+using index_sequence_for = make_integer_sequence<boost::math::size_t, sizeof...(T)>;
 
 }}}} // namespaces
 
diff --git a/include/boost/math/tools/numeric_limits.hpp b/include/boost/math/tools/numeric_limits.hpp
new file mode 100644
index 0000000000..87a7802363
--- /dev/null
+++ b/include/boost/math/tools/numeric_limits.hpp
@@ -0,0 +1,888 @@
+//  Copyright (c) 2024 Matt Borland
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+//  Regular use of std::numeric_limits functions can not be used on 
+//  GPU platforms like CUDA since they are missing the __device__ marker
+//  and libcu++ does not provide something analogous.
+//  Rather than using giant if else blocks make our own version of numeric limits
+//
+//  On the CUDA NVRTC platform we use a best attempt at emulating the functions
+//  and values since we do not have any macros to go off of.
+//  Use the values as found on GCC 11.4 RHEL 9.4 x64
+
+#ifndef BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP
+#define BOOST_MATH_TOOLS_NUMERIC_LIMITS_HPP
+
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+
+#include <type_traits>
+#include <limits>
+#include <climits>
+#include <cfloat>
+
+#endif
+
+namespace boost {
+namespace math {
+
+template <typename T>
+struct numeric_limits 
+#ifndef BOOST_MATH_HAS_NVRTC
+: public std::numeric_limits<T> {};
+#else
+{};
+#endif
+
+#if defined(BOOST_MATH_HAS_GPU_SUPPORT) && !defined(BOOST_MATH_HAS_NVRTC)
+
+template <>
+struct numeric_limits<float>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<float>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<float>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<float>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<float>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<float>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<float>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<float>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<float>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<float>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<float>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<float>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<float>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<float>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<float>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<float>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<float>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<float>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<float>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<float>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<float>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<float>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (min)         () { return FLT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (max)         () { return FLT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float lowest        () { return -FLT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float epsilon       () { return FLT_EPSILON; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float round_error   () { return 0.5F; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float infinity      () { return static_cast<float>(INFINITY); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float quiet_NaN     () { return static_cast<float>(NAN); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float signaling_NaN () 
+    { 
+        #ifdef FLT_SNAN
+        return FLT_SNAN;
+        #else
+        return static_cast<float>(NAN);
+        #endif
+    }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float denorm_min    () { return FLT_TRUE_MIN; }
+};
+
+template <>
+struct numeric_limits<double>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<double>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<double>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<double>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<double>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<double>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<double>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<double>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<double>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<double>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<double>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<double>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<double>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<double>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<double>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<double>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<double>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<double>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<double>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<double>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<double>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<double>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (min)         () { return DBL_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (max)         () { return DBL_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double lowest        () { return -DBL_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double epsilon       () { return DBL_EPSILON; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double round_error   () { return 0.5; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double infinity      () { return static_cast<double>(INFINITY); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double quiet_NaN     () { return static_cast<double>(NAN); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double signaling_NaN () 
+    { 
+        #ifdef DBL_SNAN
+        return DBL_SNAN;
+        #else
+        return static_cast<double>(NAN);
+        #endif
+    }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min    () { return DBL_TRUE_MIN; }
+};
+
+template <>
+struct numeric_limits<short>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<short>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<short>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<short>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<short>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<short>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<short>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<short>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<short>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<short>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<short>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<short>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<short>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<short>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<short>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<short>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<short>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<short>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<short>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<short>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<short>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<short>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (min)         () { return SHRT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (max)         () { return SHRT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short lowest        () { return SHRT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned short>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned short>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned short>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned short>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned short>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned short>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned short>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned short>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned short>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned short>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned short>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned short>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned short>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned short>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned short>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned short>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned short>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned short>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned short>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned short>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned short>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned short>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (max)         () { return USHRT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<int>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<int>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<int>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<int>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<int>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<int>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<int>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<int>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<int>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<int>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<int>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<int>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<int>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<int>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<int>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<int>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<int>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<int>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<int>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<int>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<int>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<int>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (min)         () { return INT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (max)         () { return INT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int lowest        () { return INT_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned int>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned int>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned int>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned int>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned int>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned int>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned int>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned int>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned int>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned int>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned int>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned int>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned int>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned int>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned int>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned int>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned int>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned int>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned int>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned int>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned int>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned int>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (max)         () { return UINT_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (min)         () { return LONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (max)         () { return LONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long lowest        () { return LONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (max)         () { return ULONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<long long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<long long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<long long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<long long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<long long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<long long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<long long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<long long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<long long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<long long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<long long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<long long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<long long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<long long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<long long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<long long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<long long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<long long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<long long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<long long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<long long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (min)         () { return LLONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (max)         () { return LLONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long lowest        () { return LLONG_MIN; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<unsigned long long>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<unsigned long long>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<unsigned long long>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<unsigned long long>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<unsigned long long>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<unsigned long long>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<unsigned long long>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<unsigned long long>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<unsigned long long>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<unsigned long long>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<unsigned long long>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<unsigned long long>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<unsigned long long>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<unsigned long long>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<unsigned long long>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<unsigned long long>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<unsigned long long>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<unsigned long long>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<unsigned long long>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<unsigned long long>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<unsigned long long>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (max)         () { return ULLONG_MAX; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<bool>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = std::numeric_limits<bool>::is_specialized;
+    BOOST_MATH_STATIC constexpr bool is_signed = std::numeric_limits<bool>::is_signed;
+    BOOST_MATH_STATIC constexpr bool is_integer = std::numeric_limits<bool>::is_integer;
+    BOOST_MATH_STATIC constexpr bool is_exact = std::numeric_limits<bool>::is_exact;
+    BOOST_MATH_STATIC constexpr bool has_infinity = std::numeric_limits<bool>::has_infinity;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = std::numeric_limits<bool>::has_quiet_NaN;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = std::numeric_limits<bool>::has_signaling_NaN;
+
+    BOOST_MATH_STATIC constexpr std::float_round_style round_style = std::numeric_limits<bool>::round_style;
+    BOOST_MATH_STATIC constexpr bool is_iec559 = std::numeric_limits<bool>::is_iec559;
+    BOOST_MATH_STATIC constexpr bool is_bounded = std::numeric_limits<bool>::is_bounded;
+    BOOST_MATH_STATIC constexpr bool is_modulo = std::numeric_limits<bool>::is_modulo;
+    BOOST_MATH_STATIC constexpr int digits = std::numeric_limits<bool>::digits;
+    BOOST_MATH_STATIC constexpr int digits10 = std::numeric_limits<bool>::digits10;
+    BOOST_MATH_STATIC constexpr int max_digits10 = std::numeric_limits<bool>::max_digits10;
+    BOOST_MATH_STATIC constexpr int radix = std::numeric_limits<bool>::radix;
+    BOOST_MATH_STATIC constexpr int min_exponent = std::numeric_limits<bool>::min_exponent;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = std::numeric_limits<bool>::min_exponent10;
+    BOOST_MATH_STATIC constexpr int max_exponent = std::numeric_limits<bool>::max_exponent;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = std::numeric_limits<bool>::max_exponent10;
+    BOOST_MATH_STATIC constexpr bool traps = std::numeric_limits<bool>::traps;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = std::numeric_limits<bool>::tinyness_before;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (min)         () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (max)         () { return true; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool lowest        () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool epsilon       () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool round_error   () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool infinity      () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool quiet_NaN     () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool signaling_NaN () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool denorm_min    () { return false; }
+};
+
+#elif defined(BOOST_MATH_HAS_NVRTC) // Pure NVRTC support - Removes rounding style and approximates the traits
+
+template <>
+struct numeric_limits<float>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = true;
+    BOOST_MATH_STATIC constexpr bool is_integer = false;
+    BOOST_MATH_STATIC constexpr bool is_exact = false;
+    BOOST_MATH_STATIC constexpr bool has_infinity = true;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = true;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = true;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = true;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = false;
+    BOOST_MATH_STATIC constexpr int digits = 24;
+    BOOST_MATH_STATIC constexpr int digits10 = 6;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 9;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = -125;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = -37;
+    BOOST_MATH_STATIC constexpr int max_exponent = 128;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 38;
+    BOOST_MATH_STATIC constexpr bool traps = false;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (min)         () { return 1.17549435e-38F; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float (max)         () { return 3.40282347e+38F; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float lowest        () { return -3.40282347e+38F; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float epsilon       () { return 1.1920929e-07; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float round_error   () { return 0.5F; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float infinity      () { return __int_as_float(0x7f800000); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float quiet_NaN     () { return __int_as_float(0x7fc00000); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float signaling_NaN () { return __int_as_float(0x7fa00000); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr float denorm_min    () { return 1.4013e-45F; }
+};
+
+template <>
+struct numeric_limits<double>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = true;
+    BOOST_MATH_STATIC constexpr bool is_integer = false;
+    BOOST_MATH_STATIC constexpr bool is_exact = false;
+    BOOST_MATH_STATIC constexpr bool has_infinity = true;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = true;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = true;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = true;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = false;
+    BOOST_MATH_STATIC constexpr int digits = 53;
+    BOOST_MATH_STATIC constexpr int digits10 = 15;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 21;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = -1021;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = -307;
+    BOOST_MATH_STATIC constexpr int max_exponent = 1024;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 308;
+    BOOST_MATH_STATIC constexpr bool traps = false;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (min)         () { return 2.2250738585072014e-308; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double (max)         () { return 1.7976931348623157e+308; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double lowest        () { return -1.7976931348623157e+308; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double epsilon       () { return 2.2204460492503131e-16; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double round_error   () { return 0.5; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double infinity      () { return __longlong_as_double(0x7ff0000000000000ULL); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double quiet_NaN     () { return __longlong_as_double(0x7ff8000000000000ULL); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double signaling_NaN () { return __longlong_as_double(0x7ff4000000000000ULL); }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr double denorm_min    () { return 4.9406564584124654e-324; }
+};
+
+template <>
+struct numeric_limits<short>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = true;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = false;
+    BOOST_MATH_STATIC constexpr int digits = 15;
+    BOOST_MATH_STATIC constexpr int digits10 = 4;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (min)         () { return -32768; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short (max)         () { return 32767; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short lowest        () { return -32768; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned short>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = false;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = true;
+    BOOST_MATH_STATIC constexpr int digits = 16;
+    BOOST_MATH_STATIC constexpr int digits10 = 4;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short (max)         () { return 65535U; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned short denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<int>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = true;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = false;
+    BOOST_MATH_STATIC constexpr int digits = 31;
+    BOOST_MATH_STATIC constexpr int digits10 = 9;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (min)         () { return -2147483648; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int (max)         () { return 2147483647; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int lowest        () { return -2147483648; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned int>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = false;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = true;
+    BOOST_MATH_STATIC constexpr int digits = 32;
+    BOOST_MATH_STATIC constexpr int digits10 = 9;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int (max)         () { return 4294967295U; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned int denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = true;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = false;
+    BOOST_MATH_STATIC constexpr int digits = 63;
+    BOOST_MATH_STATIC constexpr int digits10 = 18;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (min)         () { return -9223372036854775808L; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long (max)         () { return 9223372036854775807L; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long lowest        () { return -9223372036854775808L; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = false;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = true;
+    BOOST_MATH_STATIC constexpr int digits = 64;
+    BOOST_MATH_STATIC constexpr int digits10 = 19;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long (max)         () { return 18446744073709551615UL; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<long long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = true;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = false;
+    BOOST_MATH_STATIC constexpr int digits = 63;
+    BOOST_MATH_STATIC constexpr int digits10 = 18;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (min)         () { return -9223372036854775808LL; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long (max)         () { return 9223372036854775807LL; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long lowest        () { return -9223372036854775808LL; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr long long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<unsigned long long>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = false;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = true;
+    BOOST_MATH_STATIC constexpr int digits = 64;
+    BOOST_MATH_STATIC constexpr int digits10 = 19;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = true;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (min)         () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long (max)         () { return 18446744073709551615UL; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long lowest        () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long epsilon       () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long round_error   () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long infinity      () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long quiet_NaN     () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long signaling_NaN () { return 0; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr unsigned long long denorm_min    () { return 0; }
+};
+
+template <>
+struct numeric_limits<bool>
+{
+    BOOST_MATH_STATIC constexpr bool is_specialized = true;
+    BOOST_MATH_STATIC constexpr bool is_signed = false;
+    BOOST_MATH_STATIC constexpr bool is_integer = true;
+    BOOST_MATH_STATIC constexpr bool is_exact = true;
+    BOOST_MATH_STATIC constexpr bool has_infinity = false;
+    BOOST_MATH_STATIC constexpr bool has_quiet_NaN = false;
+    BOOST_MATH_STATIC constexpr bool has_signaling_NaN = false;
+
+    BOOST_MATH_STATIC constexpr bool is_iec559 = false;
+    BOOST_MATH_STATIC constexpr bool is_bounded = true;
+    BOOST_MATH_STATIC constexpr bool is_modulo = false;
+    BOOST_MATH_STATIC constexpr int digits = 1;
+    BOOST_MATH_STATIC constexpr int digits10 = 0;
+    BOOST_MATH_STATIC constexpr int max_digits10 = 0;
+    BOOST_MATH_STATIC constexpr int radix = 2;
+    BOOST_MATH_STATIC constexpr int min_exponent = 0;
+    BOOST_MATH_STATIC constexpr int min_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent = 0;
+    BOOST_MATH_STATIC constexpr int max_exponent10 = 0;
+    BOOST_MATH_STATIC constexpr bool traps = false;
+    BOOST_MATH_STATIC constexpr bool tinyness_before = false;
+
+    // Member Functions
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (min)         () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool (max)         () { return true; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool lowest        () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool epsilon       () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool round_error   () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool infinity      () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool quiet_NaN     () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool signaling_NaN () { return false; }
+    BOOST_MATH_GPU_ENABLED BOOST_MATH_STATIC constexpr bool denorm_min    () { return false; }
+};
+
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
+} // namespace math
+} // namespace boost
+
+#endif
diff --git a/include/boost/math/tools/polynomial.hpp b/include/boost/math/tools/polynomial.hpp
index 6f9b9039fd..5d395cdbed 100644
--- a/include/boost/math/tools/polynomial.hpp
+++ b/include/boost/math/tools/polynomial.hpp
@@ -32,7 +32,7 @@
 namespace boost{ namespace math{ namespace tools{
 
 template <class T>
-T chebyshev_coefficient(unsigned n, unsigned m)
+BOOST_MATH_GPU_ENABLED T chebyshev_coefficient(unsigned n, unsigned m)
 {
    BOOST_MATH_STD_USING
    if(m > n)
@@ -56,7 +56,7 @@ T chebyshev_coefficient(unsigned n, unsigned m)
 }
 
 template <class Seq>
-Seq polynomial_to_chebyshev(const Seq& s)
+BOOST_MATH_GPU_ENABLED Seq polynomial_to_chebyshev(const Seq& s)
 {
    // Converts a Polynomial into Chebyshev form:
    typedef typename Seq::value_type value_type;
@@ -92,7 +92,7 @@ Seq polynomial_to_chebyshev(const Seq& s)
 }
 
 template <class Seq, class T>
-T evaluate_chebyshev(const Seq& a, const T& x)
+BOOST_MATH_GPU_ENABLED T evaluate_chebyshev(const Seq& a, const T& x)
 {
    // Clenshaw's formula:
    typedef typename Seq::difference_type difference_type;
@@ -124,7 +124,7 @@ namespace detail {
 * subtlety of distinction.
 */
 template <typename T, typename N>
-typename std::enable_if<!std::numeric_limits<T>::is_integer, void >::type
+BOOST_MATH_GPU_ENABLED typename std::enable_if<!std::numeric_limits<T>::is_integer, void >::type
 division_impl(polynomial<T> &q, polynomial<T> &u, const polynomial<T>& v, N n, N k)
 {
     q[k] = u[n + k] / v[n];
@@ -136,7 +136,7 @@ division_impl(polynomial<T> &q, polynomial<T> &u, const polynomial<T>& v, N n, N
 }
 
 template <class T, class N>
-T integer_power(T t, N n)
+BOOST_MATH_GPU_ENABLED T integer_power(T t, N n)
 {
    switch(n)
    {
@@ -167,7 +167,7 @@ T integer_power(T t, N n)
 * don't currently have that subtlety of distinction.
 */
 template <typename T, typename N>
-typename std::enable_if<std::numeric_limits<T>::is_integer, void >::type
+BOOST_MATH_GPU_ENABLED typename std::enable_if<std::numeric_limits<T>::is_integer, void >::type
 division_impl(polynomial<T> &q, polynomial<T> &u, const polynomial<T>& v, N n, N k)
 {
     q[k] = u[n + k] * integer_power(v[n], k);
@@ -187,7 +187,7 @@ division_impl(polynomial<T> &q, polynomial<T> &u, const polynomial<T>& v, N n, N
  * @param   v   Divisor.
  */
 template <typename T>
-std::pair< polynomial<T>, polynomial<T> >
+BOOST_MATH_GPU_ENABLED std::pair< polynomial<T>, polynomial<T> >
 division(polynomial<T> u, const polynomial<T>& v)
 {
     BOOST_MATH_ASSERT(v.size() <= u.size());
@@ -218,7 +218,7 @@ division(polynomial<T> u, const polynomial<T>& v)
 struct negate
 {
    template <class T>
-   T operator()(T const &x) const
+   BOOST_MATH_GPU_ENABLED T operator()(T const &x) const
    {
       return -x;
    }
@@ -227,7 +227,7 @@ struct negate
 struct plus
 {
    template <class T, class U>
-   T operator()(T const &x, U const& y) const
+   BOOST_MATH_GPU_ENABLED T operator()(T const &x, U const& y) const
    {
       return x + y;
    }
@@ -236,7 +236,7 @@ struct plus
 struct minus
 {
    template <class T, class U>
-   T operator()(T const &x, U const& y) const
+   BOOST_MATH_GPU_ENABLED T operator()(T const &x, U const& y) const
    {
       return x - y;
    }
@@ -248,13 +248,13 @@ struct minus
  * Returns the zero element for multiplication of polynomials.
  */
 template <class T>
-polynomial<T> zero_element(std::multiplies< polynomial<T> >)
+BOOST_MATH_GPU_ENABLED polynomial<T> zero_element(std::multiplies< polynomial<T> >)
 {
     return polynomial<T>();
 }
 
 template <class T>
-polynomial<T> identity_element(std::multiplies< polynomial<T> >)
+BOOST_MATH_GPU_ENABLED polynomial<T> identity_element(std::multiplies< polynomial<T> >)
 {
     return polynomial<T>(T(1));
 }
@@ -264,7 +264,7 @@ polynomial<T> identity_element(std::multiplies< polynomial<T> >)
  * This function is not defined for division by zero: user beware.
  */
 template <typename T>
-std::pair< polynomial<T>, polynomial<T> >
+BOOST_MATH_GPU_ENABLED std::pair< polynomial<T>, polynomial<T> >
 quotient_remainder(const polynomial<T>& dividend, const polynomial<T>& divisor)
 {
     BOOST_MATH_ASSERT(divisor);
@@ -283,51 +283,51 @@ class polynomial
    typedef typename std::vector<T>::size_type size_type;
 
    // construct:
-   polynomial()= default;
+   BOOST_MATH_GPU_ENABLED polynomial()= default;
 
    template <class U>
-   polynomial(const U* data, unsigned order)
+   BOOST_MATH_GPU_ENABLED polynomial(const U* data, unsigned order)
       : m_data(data, data + order + 1)
    {
        normalize();
    }
 
    template <class Iterator>
-   polynomial(Iterator first, Iterator last)
+   BOOST_MATH_GPU_ENABLED polynomial(Iterator first, Iterator last)
       : m_data(first, last)
    {
        normalize();
    }
 
    template <class Iterator>
-   polynomial(Iterator first, unsigned length)
+   BOOST_MATH_GPU_ENABLED polynomial(Iterator first, unsigned length)
       : m_data(first, std::next(first, length + 1))
    {
        normalize();
    }
 
-   polynomial(std::vector<T>&& p) : m_data(std::move(p))
+   BOOST_MATH_GPU_ENABLED polynomial(std::vector<T>&& p) : m_data(std::move(p))
    {
       normalize();
    }
 
    template <class U, typename std::enable_if<std::is_convertible<U, T>::value, bool>::type = true>
-   explicit polynomial(const U& point)
+   BOOST_MATH_GPU_ENABLED explicit polynomial(const U& point)
    {
        if (point != U(0))
           m_data.push_back(point);
    }
 
    // move:
-   polynomial(polynomial&& p) noexcept
+   BOOST_MATH_GPU_ENABLED polynomial(polynomial&& p) noexcept
       : m_data(std::move(p.m_data)) { }
 
    // copy:
-   polynomial(const polynomial& p)
+   BOOST_MATH_GPU_ENABLED polynomial(const polynomial& p)
       : m_data(p.m_data) { }
 
    template <class U>
-   polynomial(const polynomial<U>& p)
+   BOOST_MATH_GPU_ENABLED polynomial(const polynomial<U>& p)
    {
       m_data.resize(p.size());
       for(unsigned i = 0; i < p.size(); ++i)
@@ -337,17 +337,17 @@ class polynomial
    }
 #ifdef BOOST_MATH_HAS_IS_CONST_ITERABLE
     template <class Range, typename std::enable_if<boost::math::tools::detail::is_const_iterable<Range>::value, bool>::type = true>
-    explicit polynomial(const Range& r) 
+    BOOST_MATH_GPU_ENABLED explicit polynomial(const Range& r)
        : polynomial(r.begin(), r.end()) 
     {
     }
 #endif
-    polynomial(std::initializer_list<T> l) : polynomial(std::begin(l), std::end(l))
+    BOOST_MATH_GPU_ENABLED polynomial(std::initializer_list<T> l) : polynomial(std::begin(l), std::end(l))
     {
     }
 
     polynomial&
-    operator=(std::initializer_list<T> l)
+    BOOST_MATH_GPU_ENABLED operator=(std::initializer_list<T> l)
     {
         m_data.assign(std::begin(l), std::end(l));
         normalize();
@@ -356,47 +356,47 @@ class polynomial
 
 
    // access:
-   size_type size() const { return m_data.size(); }
-   size_type degree() const
+   BOOST_MATH_GPU_ENABLED size_type size() const { return m_data.size(); }
+   BOOST_MATH_GPU_ENABLED size_type degree() const
    {
        if (size() == 0)
           BOOST_MATH_THROW_EXCEPTION(std::logic_error("degree() is undefined for the zero polynomial."));
        return m_data.size() - 1;
    }
-   value_type& operator[](size_type i)
+   BOOST_MATH_GPU_ENABLED value_type& operator[](size_type i)
    {
       return m_data[i];
    }
-   const value_type& operator[](size_type i) const
+   BOOST_MATH_GPU_ENABLED const value_type& operator[](size_type i) const
    {
       return m_data[i];
    }
 
-   T evaluate(T z) const
+   BOOST_MATH_GPU_ENABLED T evaluate(T z) const
    {
       return this->operator()(z);
    }
 
-   T operator()(T z) const
+   BOOST_MATH_GPU_ENABLED T operator()(T z) const
    {
       return m_data.size() > 0 ? boost::math::tools::evaluate_polynomial((m_data).data(), z, m_data.size()) : T(0);
    }
-   std::vector<T> chebyshev() const
+   BOOST_MATH_GPU_ENABLED std::vector<T> chebyshev() const
    {
       return polynomial_to_chebyshev(m_data);
    }
 
-   std::vector<T> const& data() const
+   BOOST_MATH_GPU_ENABLED std::vector<T> const& data() const
    {
        return m_data;
    }
 
-   std::vector<T> & data()
+   BOOST_MATH_GPU_ENABLED std::vector<T> & data()
    {
        return m_data;
    }
 
-   polynomial<T> prime() const
+   BOOST_MATH_GPU_ENABLED polynomial<T> prime() const
    {
 #ifdef _MSC_VER
       // Disable int->float conversion warning:
@@ -418,7 +418,7 @@ class polynomial
 #endif
    }
 
-   polynomial<T> integrate() const
+   BOOST_MATH_GPU_ENABLED polynomial<T> integrate() const
    {
       std::vector<T> i_data(m_data.size() + 1);
       // Choose integration constant such that P(0) = 0.
@@ -431,20 +431,20 @@ class polynomial
    }
 
    // operators:
-   polynomial& operator =(polynomial&& p) noexcept
+   BOOST_MATH_GPU_ENABLED polynomial& operator =(polynomial&& p) noexcept
    {
        m_data = std::move(p.m_data);
        return *this;
    }
 
-   polynomial& operator =(const polynomial& p)
+   BOOST_MATH_GPU_ENABLED polynomial& operator =(const polynomial& p)
    {
        m_data = p.m_data;
        return *this;
    }
 
    template <class U>
-   typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator +=(const U& value)
+   BOOST_MATH_GPU_ENABLED typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator +=(const U& value)
    {
        addition(value);
        normalize();
@@ -452,7 +452,7 @@ class polynomial
    }
 
    template <class U>
-   typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator -=(const U& value)
+   BOOST_MATH_GPU_ENABLED typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator -=(const U& value)
    {
        subtraction(value);
        normalize();
@@ -460,7 +460,7 @@ class polynomial
    }
 
    template <class U>
-   typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator *=(const U& value)
+   BOOST_MATH_GPU_ENABLED typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator *=(const U& value)
    {
       multiplication(value);
       normalize();
@@ -468,7 +468,7 @@ class polynomial
    }
 
    template <class U>
-   typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator /=(const U& value)
+   BOOST_MATH_GPU_ENABLED typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator /=(const U& value)
    {
        division(value);
        normalize();
@@ -476,7 +476,7 @@ class polynomial
    }
 
    template <class U>
-   typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator %=(const U& /*value*/)
+   BOOST_MATH_GPU_ENABLED typename std::enable_if<std::is_constructible<T, U>::value, polynomial&>::type operator %=(const U& /*value*/)
    {
        // We can always divide by a scalar, so there is no remainder:
        this->set_zero();
@@ -484,7 +484,7 @@ class polynomial
    }
 
    template <class U>
-   polynomial& operator +=(const polynomial<U>& value)
+   BOOST_MATH_GPU_ENABLED polynomial& operator +=(const polynomial<U>& value)
    {
       addition(value);
       normalize();
@@ -492,7 +492,7 @@ class polynomial
    }
 
    template <class U>
-   polynomial& operator -=(const polynomial<U>& value)
+   BOOST_MATH_GPU_ENABLED polynomial& operator -=(const polynomial<U>& value)
    {
        subtraction(value);
        normalize();
@@ -500,7 +500,7 @@ class polynomial
    }
 
    template <typename U, typename V>
-   void multiply(const polynomial<U>& a, const polynomial<V>& b) {
+   BOOST_MATH_GPU_ENABLED void multiply(const polynomial<U>& a, const polynomial<V>& b) {
        if (!a || !b)
        {
            this->set_zero();
@@ -514,28 +514,28 @@ class polynomial
    }
 
    template <class U>
-   polynomial& operator *=(const polynomial<U>& value)
+   BOOST_MATH_GPU_ENABLED polynomial& operator *=(const polynomial<U>& value)
    {
       this->multiply(*this, value);
       return *this;
    }
 
    template <typename U>
-   polynomial& operator /=(const polynomial<U>& value)
+   BOOST_MATH_GPU_ENABLED polynomial& operator /=(const polynomial<U>& value)
    {
        *this = quotient_remainder(*this, value).first;
        return *this;
    }
 
    template <typename U>
-   polynomial& operator %=(const polynomial<U>& value)
+   BOOST_MATH_GPU_ENABLED polynomial& operator %=(const polynomial<U>& value)
    {
        *this = quotient_remainder(*this, value).second;
        return *this;
    }
 
    template <typename U>
-   polynomial& operator >>=(U const &n)
+   BOOST_MATH_GPU_ENABLED polynomial& operator >>=(U const &n)
    {
        BOOST_MATH_ASSERT(n <= m_data.size());
        m_data.erase(m_data.begin(), m_data.begin() + n);
@@ -543,7 +543,7 @@ class polynomial
    }
 
    template <typename U>
-   polynomial& operator <<=(U const &n)
+   BOOST_MATH_GPU_ENABLED polynomial& operator <<=(U const &n)
    {
        m_data.insert(m_data.begin(), n, static_cast<T>(0));
        normalize();
@@ -551,33 +551,33 @@ class polynomial
    }
 
    // Convenient and efficient query for zero.
-   bool is_zero() const
+   BOOST_MATH_GPU_ENABLED bool is_zero() const
    {
        return m_data.empty();
    }
 
    // Conversion to bool.
-   inline explicit operator bool() const
+   BOOST_MATH_GPU_ENABLED inline explicit operator bool() const
    {
        return !m_data.empty();
    }
 
    // Fast way to set a polynomial to zero.
-   void set_zero()
+   BOOST_MATH_GPU_ENABLED void set_zero()
    {
        m_data.clear();
    }
 
     /** Remove zero coefficients 'from the top', that is for which there are no
     *        non-zero coefficients of higher degree. */
-   void normalize()
+   BOOST_MATH_GPU_ENABLED void normalize()
    {
       m_data.erase(std::find_if(m_data.rbegin(), m_data.rend(), [](const T& x)->bool { return x != T(0); }).base(), m_data.end());
    }
 
 private:
     template <class U, class R>
-    polynomial& addition(const U& value, R op)
+    BOOST_MATH_GPU_ENABLED polynomial& addition(const U& value, R op)
     {
         if(m_data.size() == 0)
             m_data.resize(1, 0);
@@ -586,19 +586,19 @@ class polynomial
     }
 
     template <class U>
-    polynomial& addition(const U& value)
+    BOOST_MATH_GPU_ENABLED polynomial& addition(const U& value)
     {
         return addition(value, detail::plus());
     }
 
     template <class U>
-    polynomial& subtraction(const U& value)
+    BOOST_MATH_GPU_ENABLED polynomial& subtraction(const U& value)
     {
         return addition(value, detail::minus());
     }
 
     template <class U, class R>
-    polynomial& addition(const polynomial<U>& value, R op)
+    BOOST_MATH_GPU_ENABLED polynomial& addition(const polynomial<U>& value, R op)
     {
         if (m_data.size() < value.size())
             m_data.resize(value.size(), 0);
@@ -608,26 +608,26 @@ class polynomial
     }
 
     template <class U>
-    polynomial& addition(const polynomial<U>& value)
+    BOOST_MATH_GPU_ENABLED polynomial& addition(const polynomial<U>& value)
     {
         return addition(value, detail::plus());
     }
 
     template <class U>
-    polynomial& subtraction(const polynomial<U>& value)
+    BOOST_MATH_GPU_ENABLED polynomial& subtraction(const polynomial<U>& value)
     {
         return addition(value, detail::minus());
     }
 
     template <class U>
-    polynomial& multiplication(const U& value)
+    BOOST_MATH_GPU_ENABLED polynomial& multiplication(const U& value)
     {
        std::transform(m_data.begin(), m_data.end(), m_data.begin(), [&](const T& x)->T { return x * value; });
        return *this;
     }
 
     template <class U>
-    polynomial& division(const U& value)
+    BOOST_MATH_GPU_ENABLED polynomial& division(const U& value)
     {
        std::transform(m_data.begin(), m_data.end(), m_data.begin(), [&](const T& x)->T { return x / value; });
        return *this;
@@ -638,7 +638,7 @@ class polynomial
 
 
 template <class T>
-inline polynomial<T> operator + (const polynomial<T>& a, const polynomial<T>& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator + (const polynomial<T>& a, const polynomial<T>& b)
 {
    polynomial<T> result(a);
    result += b;
@@ -646,26 +646,26 @@ inline polynomial<T> operator + (const polynomial<T>& a, const polynomial<T>& b)
 }
 
 template <class T>
-inline polynomial<T> operator + (polynomial<T>&& a, const polynomial<T>& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator + (polynomial<T>&& a, const polynomial<T>& b)
 {
    a += b;
    return std::move(a);
 }
 template <class T>
-inline polynomial<T> operator + (const polynomial<T>& a, polynomial<T>&& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator + (const polynomial<T>& a, polynomial<T>&& b)
 {
    b += a;
    return b;
 }
 template <class T>
-inline polynomial<T> operator + (polynomial<T>&& a, polynomial<T>&& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator + (polynomial<T>&& a, polynomial<T>&& b)
 {
    a += b;
    return a;
 }
 
 template <class T>
-inline polynomial<T> operator - (const polynomial<T>& a, const polynomial<T>& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator - (const polynomial<T>& a, const polynomial<T>& b)
 {
    polynomial<T> result(a);
    result -= b;
@@ -673,26 +673,26 @@ inline polynomial<T> operator - (const polynomial<T>& a, const polynomial<T>& b)
 }
 
 template <class T>
-inline polynomial<T> operator - (polynomial<T>&& a, const polynomial<T>& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator - (polynomial<T>&& a, const polynomial<T>& b)
 {
    a -= b;
    return a;
 }
 template <class T>
-inline polynomial<T> operator - (const polynomial<T>& a, polynomial<T>&& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator - (const polynomial<T>& a, polynomial<T>&& b)
 {
    b -= a;
    return -b;
 }
 template <class T>
-inline polynomial<T> operator - (polynomial<T>&& a, polynomial<T>&& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator - (polynomial<T>&& a, polynomial<T>&& b)
 {
    a -= b;
    return a;
 }
 
 template <class T>
-inline polynomial<T> operator * (const polynomial<T>& a, const polynomial<T>& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator * (const polynomial<T>& a, const polynomial<T>& b)
 {
    polynomial<T> result;
    result.multiply(a, b);
@@ -700,94 +700,94 @@ inline polynomial<T> operator * (const polynomial<T>& a, const polynomial<T>& b)
 }
 
 template <class T>
-inline polynomial<T> operator / (const polynomial<T>& a, const polynomial<T>& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator / (const polynomial<T>& a, const polynomial<T>& b)
 {
    return quotient_remainder(a, b).first;
 }
 
 template <class T>
-inline polynomial<T> operator % (const polynomial<T>& a, const polynomial<T>& b)
+BOOST_MATH_GPU_ENABLED inline polynomial<T> operator % (const polynomial<T>& a, const polynomial<T>& b)
 {
    return quotient_remainder(a, b).second;
 }
 
 template <class T, class U>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator + (polynomial<T> a, const U& b)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator + (polynomial<T> a, const U& b)
 {
    a += b;
    return a;
 }
 
 template <class T, class U>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator - (polynomial<T> a, const U& b)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator - (polynomial<T> a, const U& b)
 {
    a -= b;
    return a;
 }
 
 template <class T, class U>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator * (polynomial<T> a, const U& b)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator * (polynomial<T> a, const U& b)
 {
    a *= b;
    return a;
 }
 
 template <class T, class U>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator / (polynomial<T> a, const U& b)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator / (polynomial<T> a, const U& b)
 {
    a /= b;
    return a;
 }
 
 template <class T, class U>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator % (const polynomial<T>&, const U&)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator % (const polynomial<T>&, const U&)
 {
    // Since we can always divide by a scalar, result is always an empty polynomial:
    return polynomial<T>();
 }
 
 template <class U, class T>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator + (const U& a, polynomial<T> b)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator + (const U& a, polynomial<T> b)
 {
    b += a;
    return b;
 }
 
 template <class U, class T>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator - (const U& a, polynomial<T> b)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator - (const U& a, polynomial<T> b)
 {
    b -= a;
    return -b;
 }
 
 template <class U, class T>
-inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator * (const U& a, polynomial<T> b)
+BOOST_MATH_GPU_ENABLED inline typename std::enable_if<std::is_constructible<T, U>::value, polynomial<T> >::type operator * (const U& a, polynomial<T> b)
 {
    b *= a;
    return b;
 }
 
 template <class T>
-bool operator == (const polynomial<T> &a, const polynomial<T> &b)
+BOOST_MATH_GPU_ENABLED bool operator == (const polynomial<T> &a, const polynomial<T> &b)
 {
     return a.data() == b.data();
 }
 
 template <class T>
-bool operator != (const polynomial<T> &a, const polynomial<T> &b)
+BOOST_MATH_GPU_ENABLED bool operator != (const polynomial<T> &a, const polynomial<T> &b)
 {
     return a.data() != b.data();
 }
 
 template <typename T, typename U>
-polynomial<T> operator >> (polynomial<T> a, const U& b)
+BOOST_MATH_GPU_ENABLED polynomial<T> operator >> (polynomial<T> a, const U& b)
 {
     a >>= b;
     return a;
 }
 
 template <typename T, typename U>
-polynomial<T> operator << (polynomial<T> a, const U& b)
+BOOST_MATH_GPU_ENABLED polynomial<T> operator << (polynomial<T> a, const U& b)
 {
     a <<= b;
     return a;
@@ -795,26 +795,26 @@ polynomial<T> operator << (polynomial<T> a, const U& b)
 
 // Unary minus (negate).
 template <class T>
-polynomial<T> operator - (polynomial<T> a)
+BOOST_MATH_GPU_ENABLED polynomial<T> operator - (polynomial<T> a)
 {
     std::transform(a.data().begin(), a.data().end(), a.data().begin(), detail::negate());
     return a;
 }
 
 template <class T>
-bool odd(polynomial<T> const &a)
+BOOST_MATH_GPU_ENABLED bool odd(polynomial<T> const &a)
 {
     return a.size() > 0 && a[0] != static_cast<T>(0);
 }
 
 template <class T>
-bool even(polynomial<T> const &a)
+BOOST_MATH_GPU_ENABLED bool even(polynomial<T> const &a)
 {
     return !odd(a);
 }
 
 template <class T>
-polynomial<T> pow(polynomial<T> base, int exp)
+BOOST_MATH_GPU_ENABLED polynomial<T> pow(polynomial<T> base, int exp)
 {
     if (exp < 0)
         return policies::raise_domain_error(
@@ -838,7 +838,7 @@ polynomial<T> pow(polynomial<T> base, int exp)
 }
 
 template <class charT, class traits, class T>
-inline std::basic_ostream<charT, traits>& operator << (std::basic_ostream<charT, traits>& os, const polynomial<T>& poly)
+BOOST_MATH_GPU_ENABLED inline std::basic_ostream<charT, traits>& operator << (std::basic_ostream<charT, traits>& os, const polynomial<T>& poly)
 {
    os << "{ ";
    for(unsigned i = 0; i < poly.size(); ++i)
diff --git a/include/boost/math/tools/precision.hpp b/include/boost/math/tools/precision.hpp
index d1643e01d3..662657732c 100644
--- a/include/boost/math/tools/precision.hpp
+++ b/include/boost/math/tools/precision.hpp
@@ -10,14 +10,20 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
 #include <boost/math/policies/policy.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
 #include <type_traits>
 #include <limits>
 #include <climits>
 #include <cmath>
 #include <cstdint>
 #include <cfloat> // LDBL_MANT_DIG
+#endif
 
 namespace boost{ namespace math
 {
@@ -36,30 +42,30 @@ namespace tools
 // See  Conceptual Requirements for Real Number Types.
 
 template <class T>
-inline constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept
+BOOST_MATH_GPU_ENABLED inline constexpr int digits(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept
 {
-   static_assert( ::std::numeric_limits<T>::is_specialized, "Type T must be specialized");
-   static_assert( ::std::numeric_limits<T>::radix == 2 || ::std::numeric_limits<T>::radix == 10, "Type T must have a radix of 2 or 10");
+   static_assert( ::boost::math::numeric_limits<T>::is_specialized, "Type T must be specialized");
+   static_assert( ::boost::math::numeric_limits<T>::radix == 2 || ::boost::math::numeric_limits<T>::radix == 10, "Type T must have a radix of 2 or 10");
 
-   return std::numeric_limits<T>::radix == 2
-      ? std::numeric_limits<T>::digits
-      : ((std::numeric_limits<T>::digits + 1) * 1000L) / 301L;
+   return boost::math::numeric_limits<T>::radix == 2
+      ? boost::math::numeric_limits<T>::digits
+      : ((boost::math::numeric_limits<T>::digits + 1) * 1000L) / 301L;
 }
 
 template <class T>
-inline constexpr T max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T))  noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T))  noexcept(boost::math::is_floating_point<T>::value)
 {
-   static_assert( ::std::numeric_limits<T>::is_specialized, "Type T must be specialized");
-   return (std::numeric_limits<T>::max)();
+   static_assert( ::boost::math::numeric_limits<T>::is_specialized, "Type T must be specialized");
+   return (boost::math::numeric_limits<T>::max)();
 } // Also used as a finite 'infinite' value for - and +infinity, for example:
 // -max_value<double> = -1.79769e+308, max_value<double> = 1.79769e+308.
 
 template <class T>
-inline constexpr T min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
-   static_assert( ::std::numeric_limits<T>::is_specialized, "Type T must be specialized");
+   static_assert( ::boost::math::numeric_limits<T>::is_specialized, "Type T must be specialized");
 
-   return (std::numeric_limits<T>::min)();
+   return (boost::math::numeric_limits<T>::min)();
 }
 
 namespace detail{
@@ -72,13 +78,13 @@ namespace detail{
 // For type float first:
 //
 template <class T>
-inline constexpr T log_max_value(const std::integral_constant<int, 128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T log_max_value(const boost::math::integral_constant<int, 128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
    return 88.0f;
 }
 
 template <class T>
-inline constexpr T log_min_value(const std::integral_constant<int, 128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T log_min_value(const boost::math::integral_constant<int, 128>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
    return -87.0f;
 }
@@ -86,13 +92,13 @@ inline constexpr T log_min_value(const std::integral_constant<int, 128>& BOOST_M
 // Now double:
 //
 template <class T>
-inline constexpr T log_max_value(const std::integral_constant<int, 1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T log_max_value(const boost::math::integral_constant<int, 1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
    return 709.0;
 }
 
 template <class T>
-inline constexpr T log_min_value(const std::integral_constant<int, 1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T log_min_value(const boost::math::integral_constant<int, 1024>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
    return -708.0;
 }
@@ -100,19 +106,19 @@ inline constexpr T log_min_value(const std::integral_constant<int, 1024>& BOOST_
 // 80 and 128-bit long doubles:
 //
 template <class T>
-inline constexpr T log_max_value(const std::integral_constant<int, 16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T log_max_value(const boost::math::integral_constant<int, 16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
    return 11356.0L;
 }
 
 template <class T>
-inline constexpr T log_min_value(const std::integral_constant<int, 16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T log_min_value(const boost::math::integral_constant<int, 16384>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
    return -11355.0L;
 }
 
 template <class T>
-inline T log_max_value(const std::integral_constant<int, 0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T))
+BOOST_MATH_GPU_ENABLED inline T log_max_value(const boost::math::integral_constant<int, 0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T))
 {
    BOOST_MATH_STD_USING
 #ifdef __SUNPRO_CC
@@ -125,7 +131,7 @@ inline T log_max_value(const std::integral_constant<int, 0>& BOOST_MATH_APPEND_E
 }
 
 template <class T>
-inline T log_min_value(const std::integral_constant<int, 0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T))
+BOOST_MATH_GPU_ENABLED inline T log_min_value(const boost::math::integral_constant<int, 0>& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T))
 {
    BOOST_MATH_STD_USING
 #ifdef __SUNPRO_CC
@@ -138,14 +144,14 @@ inline T log_min_value(const std::integral_constant<int, 0>& BOOST_MATH_APPEND_E
 }
 
 template <class T>
-inline constexpr T epsilon(const std::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T epsilon(const boost::math::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
-   return std::numeric_limits<T>::epsilon();
+   return boost::math::numeric_limits<T>::epsilon();
 }
 
 #if defined(__GNUC__) && ((LDBL_MANT_DIG == 106) || (__LDBL_MANT_DIG__ == 106))
 template <>
-inline constexpr long double epsilon<long double>(const std::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(long double)) noexcept(std::is_floating_point<long double>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr long double epsilon<long double>(const boost::math::true_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(long double)) noexcept(boost::math::is_floating_point<long double>::value)
 {
    // numeric_limits on Darwin (and elsewhere) tells lies here:
    // the issue is that long double on a few platforms is
@@ -164,7 +170,7 @@ inline constexpr long double epsilon<long double>(const std::true_type& BOOST_MA
 #endif
 
 template <class T>
-inline T epsilon(const std::false_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T))
+BOOST_MATH_GPU_ENABLED inline T epsilon(const boost::math::false_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE(T))
 {
    // Note: don't cache result as precision may vary at runtime:
    BOOST_MATH_STD_USING  // for ADL of std names
@@ -174,23 +180,23 @@ inline T epsilon(const std::false_type& BOOST_MATH_APPEND_EXPLICIT_TEMPLATE_TYPE
 template <class T>
 struct log_limit_traits
 {
-   typedef typename std::conditional<
-      (std::numeric_limits<T>::radix == 2) &&
-      (std::numeric_limits<T>::max_exponent == 128
-         || std::numeric_limits<T>::max_exponent == 1024
-         || std::numeric_limits<T>::max_exponent == 16384),
-      std::integral_constant<int, (std::numeric_limits<T>::max_exponent > INT_MAX ? INT_MAX : static_cast<int>(std::numeric_limits<T>::max_exponent))>,
-      std::integral_constant<int, 0>
+   typedef typename boost::math::conditional<
+      (boost::math::numeric_limits<T>::radix == 2) &&
+      (boost::math::numeric_limits<T>::max_exponent == 128
+         || boost::math::numeric_limits<T>::max_exponent == 1024
+         || boost::math::numeric_limits<T>::max_exponent == 16384),
+      boost::math::integral_constant<int, (boost::math::numeric_limits<T>::max_exponent > (boost::math::numeric_limits<int>::max)() ? (boost::math::numeric_limits<int>::max)() : static_cast<int>(boost::math::numeric_limits<T>::max_exponent))>,
+      boost::math::integral_constant<int, 0>
    >::type tag_type;
    static constexpr bool value = (tag_type::value != 0);
-   static_assert(::std::numeric_limits<T>::is_specialized || !value, "Type T must be specialized or equal to 0");
+   static_assert(::boost::math::numeric_limits<T>::is_specialized || !value, "Type T must be specialized or equal to 0");
 };
 
 template <class T, bool b> struct log_limit_noexcept_traits_imp : public log_limit_traits<T> {};
-template <class T> struct log_limit_noexcept_traits_imp<T, false> : public std::integral_constant<bool, false> {};
+template <class T> struct log_limit_noexcept_traits_imp<T, false> : public boost::math::integral_constant<bool, false> {};
 
 template <class T>
-struct log_limit_noexcept_traits : public log_limit_noexcept_traits_imp<T, std::is_floating_point<T>::value> {};
+struct log_limit_noexcept_traits : public log_limit_noexcept_traits_imp<T, boost::math::is_floating_point<T>::value> {};
 
 } // namespace detail
 
@@ -200,28 +206,36 @@ struct log_limit_noexcept_traits : public log_limit_noexcept_traits_imp<T, std::
 #endif
 
 template <class T>
-inline constexpr T log_max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T log_max_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits<T>::value)
 {
-#ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
-   return detail::log_max_value<T>(typename detail::log_limit_traits<T>::tag_type());
+#ifndef BOOST_MATH_HAS_NVRTC
+   #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+      return detail::log_max_value<T>(typename detail::log_limit_traits<T>::tag_type());
+   #else
+      BOOST_MATH_ASSERT(::boost::math::numeric_limits<T>::is_specialized);
+      BOOST_MATH_STD_USING
+      static const T val = log((boost::math::numeric_limits<T>::max)());
+      return val;
+   #endif
 #else
-   BOOST_MATH_ASSERT(::std::numeric_limits<T>::is_specialized);
-   BOOST_MATH_STD_USING
-   static const T val = log((std::numeric_limits<T>::max)());
-   return val;
+   return log((boost::math::numeric_limits<T>::max)());
 #endif
 }
 
 template <class T>
-inline constexpr T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(detail::log_limit_noexcept_traits<T>::value)
 {
-#ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
-   return detail::log_min_value<T>(typename detail::log_limit_traits<T>::tag_type());
+#ifndef BOOST_MATH_HAS_NVRTC
+   #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+      return detail::log_min_value<T>(typename detail::log_limit_traits<T>::tag_type());
+   #else
+      BOOST_MATH_ASSERT(::boost::math::numeric_limits<T>::is_specialized);
+      BOOST_MATH_STD_USING
+      static const T val = log((boost::math::numeric_limits<T>::min)());
+      return val;
+   #endif
 #else
-   BOOST_MATH_ASSERT(::std::numeric_limits<T>::is_specialized);
-   BOOST_MATH_STD_USING
-   static const T val = log((std::numeric_limits<T>::min)());
-   return val;
+   return log((boost::math::numeric_limits<T>::min)());
 #endif
 }
 
@@ -230,84 +244,89 @@ inline constexpr T log_min_value(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE(T)) noexcept(
 #endif
 
 template <class T>
-inline constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED constexpr T epsilon(BOOST_MATH_EXPLICIT_TEMPLATE_TYPE_SPEC(T)) noexcept(boost::math::is_floating_point<T>::value)
 {
-#ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
-   return detail::epsilon<T>(std::integral_constant<bool, ::std::numeric_limits<T>::is_specialized>());
+   // NVRTC does not like this dispatching method so we just skip to where we want to go
+#ifndef BOOST_MATH_HAS_NVRTC
+   #ifndef BOOST_NO_LIMITS_COMPILE_TIME_CONSTANTS
+      return detail::epsilon<T>(boost::math::integral_constant<bool, ::boost::math::numeric_limits<T>::is_specialized>());
+   #else
+      return ::boost::math::numeric_limits<T>::is_specialized ?
+         detail::epsilon<T>(boost::math::true_type()) :
+         detail::epsilon<T>(boost::math::false_type());
+   #endif
 #else
-   return ::std::numeric_limits<T>::is_specialized ?
-      detail::epsilon<T>(std::true_type()) :
-      detail::epsilon<T>(std::false_type());
+   return boost::math::numeric_limits<T>::epsilon();
 #endif
 }
 
 namespace detail{
 
 template <class T>
-inline constexpr T root_epsilon_imp(const std::integral_constant<int, 24>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const boost::math::integral_constant<int, 24>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.00034526698300124390839884978618400831996329879769945L);
 }
 
 template <class T>
-inline constexpr T root_epsilon_imp(const T*, const std::integral_constant<int, 53>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const T*, const boost::math::integral_constant<int, 53>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.1490116119384765625e-7L);
 }
 
 template <class T>
-inline constexpr T root_epsilon_imp(const T*, const std::integral_constant<int, 64>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const T*, const boost::math::integral_constant<int, 64>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.32927225399135962333569506281281311031656150598474e-9L);
 }
 
 template <class T>
-inline constexpr T root_epsilon_imp(const T*, const std::integral_constant<int, 113>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon_imp(const T*, const boost::math::integral_constant<int, 113>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.1387778780781445675529539585113525390625e-16L);
 }
 
 template <class T, class Tag>
-inline T root_epsilon_imp(const T*, const Tag&)
+BOOST_MATH_GPU_ENABLED inline T root_epsilon_imp(const T*, const Tag&)
 {
    BOOST_MATH_STD_USING
-   static const T r_eps = sqrt(tools::epsilon<T>());
+   BOOST_MATH_STATIC_LOCAL_VARIABLE const T r_eps = sqrt(tools::epsilon<T>());
    return r_eps;
 }
 
 template <class T>
-inline T root_epsilon_imp(const T*, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED inline T root_epsilon_imp(const T*, const boost::math::integral_constant<int, 0>&)
 {
    BOOST_MATH_STD_USING
    return sqrt(tools::epsilon<T>());
 }
 
 template <class T>
-inline constexpr T cbrt_epsilon_imp(const std::integral_constant<int, 24>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const boost::math::integral_constant<int, 24>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.0049215666011518482998719164346805794944150447839903L);
 }
 
 template <class T>
-inline constexpr T cbrt_epsilon_imp(const T*, const std::integral_constant<int, 53>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const T*, const boost::math::integral_constant<int, 53>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(6.05545445239333906078989272793696693569753008995e-6L);
 }
 
 template <class T>
-inline constexpr T cbrt_epsilon_imp(const T*, const std::integral_constant<int, 64>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const T*, const boost::math::integral_constant<int, 64>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(4.76837158203125e-7L);
 }
 
 template <class T>
-inline constexpr T cbrt_epsilon_imp(const T*, const std::integral_constant<int, 113>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon_imp(const T*, const boost::math::integral_constant<int, 113>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(5.7749313854154005630396773604745549542403508090496e-12L);
 }
 
 template <class T, class Tag>
-inline T cbrt_epsilon_imp(const T*, const Tag&)
+BOOST_MATH_GPU_ENABLED inline T cbrt_epsilon_imp(const T*, const Tag&)
 {
    BOOST_MATH_STD_USING;
    static const T cbrt_eps = pow(tools::epsilon<T>(), T(1) / 3);
@@ -315,38 +334,38 @@ inline T cbrt_epsilon_imp(const T*, const Tag&)
 }
 
 template <class T>
-inline T cbrt_epsilon_imp(const T*, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED inline T cbrt_epsilon_imp(const T*, const boost::math::integral_constant<int, 0>&)
 {
    BOOST_MATH_STD_USING;
    return pow(tools::epsilon<T>(), T(1) / 3);
 }
 
 template <class T>
-inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant<int, 24>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant<int, 24>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.018581361171917516667460937040007436176452688944747L);
 }
 
 template <class T>
-inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant<int, 53>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant<int, 53>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.0001220703125L);
 }
 
 template <class T>
-inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant<int, 64>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant<int, 64>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.18145860519450699870567321328132261891067079047605e-4L);
 }
 
 template <class T>
-inline constexpr T forth_root_epsilon_imp(const T*, const std::integral_constant<int, 113>&) noexcept(std::is_floating_point<T>::value)
+BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon_imp(const T*, const boost::math::integral_constant<int, 113>&) noexcept(boost::math::is_floating_point<T>::value)
 {
    return static_cast<T>(0.37252902984619140625e-8L);
 }
 
 template <class T, class Tag>
-inline T forth_root_epsilon_imp(const T*, const Tag&)
+BOOST_MATH_GPU_ENABLED inline T forth_root_epsilon_imp(const T*, const Tag&)
 {
    BOOST_MATH_STD_USING
    static const T r_eps = sqrt(sqrt(tools::epsilon<T>()));
@@ -354,7 +373,7 @@ inline T forth_root_epsilon_imp(const T*, const Tag&)
 }
 
 template <class T>
-inline T forth_root_epsilon_imp(const T*, const std::integral_constant<int, 0>&)
+BOOST_MATH_GPU_ENABLED inline T forth_root_epsilon_imp(const T*, const boost::math::integral_constant<int, 0>&)
 {
    BOOST_MATH_STD_USING
    return sqrt(sqrt(tools::epsilon<T>()));
@@ -363,26 +382,26 @@ inline T forth_root_epsilon_imp(const T*, const std::integral_constant<int, 0>&)
 template <class T>
 struct root_epsilon_traits
 {
-   typedef std::integral_constant<int, (::std::numeric_limits<T>::radix == 2) && (::std::numeric_limits<T>::digits != INT_MAX) ? std::numeric_limits<T>::digits : 0> tag_type;
+   typedef boost::math::integral_constant<int, (::boost::math::numeric_limits<T>::radix == 2) && (::boost::math::numeric_limits<T>::digits != (boost::math::numeric_limits<int>::max)()) ? boost::math::numeric_limits<T>::digits : 0> tag_type;
    static constexpr bool has_noexcept = (tag_type::value == 113) || (tag_type::value == 64) || (tag_type::value == 53) || (tag_type::value == 24);
 };
 
 }
 
 template <class T>
-inline constexpr T root_epsilon() noexcept(std::is_floating_point<T>::value && detail::root_epsilon_traits<T>::has_noexcept)
+BOOST_MATH_GPU_ENABLED inline constexpr T root_epsilon() noexcept(boost::math::is_floating_point<T>::value && detail::root_epsilon_traits<T>::has_noexcept)
 {
    return detail::root_epsilon_imp(static_cast<T const*>(nullptr), typename detail::root_epsilon_traits<T>::tag_type());
 }
 
 template <class T>
-inline constexpr T cbrt_epsilon() noexcept(std::is_floating_point<T>::value && detail::root_epsilon_traits<T>::has_noexcept)
+BOOST_MATH_GPU_ENABLED inline constexpr T cbrt_epsilon() noexcept(boost::math::is_floating_point<T>::value && detail::root_epsilon_traits<T>::has_noexcept)
 {
    return detail::cbrt_epsilon_imp(static_cast<T const*>(nullptr), typename detail::root_epsilon_traits<T>::tag_type());
 }
 
 template <class T>
-inline constexpr T forth_root_epsilon() noexcept(std::is_floating_point<T>::value && detail::root_epsilon_traits<T>::has_noexcept)
+BOOST_MATH_GPU_ENABLED inline constexpr T forth_root_epsilon() noexcept(boost::math::is_floating_point<T>::value && detail::root_epsilon_traits<T>::has_noexcept)
 {
    return detail::forth_root_epsilon_imp(static_cast<T const*>(nullptr), typename detail::root_epsilon_traits<T>::tag_type());
 }
diff --git a/include/boost/math/tools/promotion.hpp b/include/boost/math/tools/promotion.hpp
index c117e9575d..a65f3703f4 100644
--- a/include/boost/math/tools/promotion.hpp
+++ b/include/boost/math/tools/promotion.hpp
@@ -3,6 +3,7 @@
 // Copyright John Maddock 2006.
 // Copyright Paul A. Bristow 2006.
 // Copyright Matt Borland 2023.
+// Copyright Ryan Elandt 2023.
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -24,15 +25,7 @@
 #endif
 
 #include <boost/math/tools/config.hpp>
-#include <type_traits>
-
-#if defined __has_include
-#  if __cplusplus > 202002L || (defined(_MSVC_LANG) && _MSVC_LANG > 202002L)
-#    if __has_include (<stdfloat>)
-#    include <stdfloat>
-#    endif
-#  endif
-#endif
+#include <boost/math/tools/type_traits.hpp>
 
 namespace boost
 {
@@ -40,272 +33,103 @@ namespace boost
   {
     namespace tools
     {
+      ///// This promotion system works as follows:
+      // 
+      // Rule<T1> (one argument promotion rule):
+      //   - Promotes `T` to `double` if `T` is an integer type as identified by
+      //     `std::is_integral`, otherwise is `T`
+      //
+      // Rule<T1, T2_to_TN...> (two or more argument promotion rule):
+      //   - 1. Calculates type using applying Rule<T1>.
+      //   - 2. Calculates type using applying Rule<T2_to_TN...> 
+      //   - If the type calculated in 1 and 2 are both floating point types, as
+      //     identified by `std::is_floating_point`, then return the type
+      //     determined by `std::common_type`. Otherwise return the type using
+      //     an asymmetric convertibility rule.
+      //
+      ///// Discussion:
+      //
       // If either T1 or T2 is an integer type,
       // pretend it was a double (for the purposes of further analysis).
       // Then pick the wider of the two floating-point types
       // as the actual signature to forward to.
       // For example:
-      // foo(int, short) -> double foo(double, double);
-      // foo(int, float) -> double foo(double, double);
-      // Note: NOT float foo(float, float)
-      // foo(int, double) -> foo(double, double);
-      // foo(double, float) -> double foo(double, double);
-      // foo(double, float) -> double foo(double, double);
-      // foo(any-int-or-float-type, long double) -> foo(long double, long double);
-      // but ONLY float foo(float, float) is unchanged.
-      // So the only way to get an entirely float version is to call foo(1.F, 2.F),
-      // But since most (all?) the math functions convert to double internally,
-      // probably there would not be the hoped-for gain by using float here.
-
+      //    foo(int, short) -> double foo(double, double);  // ***NOT*** float foo(float, float)
+      //    foo(int, float) -> double foo(double, double);  // ***NOT*** float foo(float, float)
+      //    foo(int, double) -> foo(double, double);
+      //    foo(double, float) -> double foo(double, double);
+      //    foo(double, float) -> double foo(double, double);
+      //    foo(any-int-or-float-type, long double) -> foo(long double, long double);
+      // ONLY float foo(float, float) is unchanged, so the only way to get an
+      // entirely float version is to call foo(1.F, 2.F). But since most (all?) the
+      // math functions convert to double internally, probably there would not be the
+      // hoped-for gain by using float here.
+      //
       // This follows the C-compatible conversion rules of pow, etc
       // where pow(int, float) is converted to pow(double, double).
 
+
+      // Promotes a single argument to double if it is an integer type
       template <class T>
-      struct promote_arg
-      { // If T is integral type, then promote to double.
-        using type = typename std::conditional<std::is_integral<T>::value, double, T>::type;
+      struct promote_arg {
+         using type = typename boost::math::conditional<boost::math::is_integral<T>::value, double, T>::type;
       };
-      // These full specialisations reduce std::conditional usage and speed up
-      // compilation:
-      template <> struct promote_arg<float> { using type = float; };
-      template <> struct promote_arg<double>{ using type = double; };
-      template <> struct promote_arg<long double> { using type = long double; };
-      template <> struct promote_arg<int> {  using type = double; };
 
-      #ifdef __STDCPP_FLOAT16_T__
-      template <> struct promote_arg<std::float16_t> { using type = std::float16_t; };
-      #endif
-      #ifdef __STDCPP_FLOAT32_T__
-      template <> struct promote_arg<std::float32_t> { using type = std::float32_t; };
-      #endif
-      #ifdef __STDCPP_FLOAT64_T__
-      template <> struct promote_arg<std::float64_t> { using type = std::float64_t; };
-      #endif
-      #ifdef __STDCPP_FLOAT128_T__
-      template <> struct promote_arg<std::float128_t> { using type = std::float128_t; };
-      #endif
-
-      template <typename T>
-      using promote_arg_t = typename promote_arg<T>::type;
 
+      // Promotes two arguments, neither of which is an integer type using an asymmetric
+      // convertibility rule.
+      template <class T1, class T2, bool = (boost::math::is_floating_point<T1>::value && boost::math::is_floating_point<T2>::value)>
+      struct pa2_integral_already_removed {
+         using type = typename boost::math::conditional<
+            !boost::math::is_floating_point<T2>::value && boost::math::is_convertible<T1, T2>::value, 
+            T2, T1>::type;
+      };
+      // For two floating point types, promotes using `std::common_type` functionality 
       template <class T1, class T2>
-      struct promote_args_2
-      { // Promote, if necessary, & pick the wider of the two floating-point types.
-        // for both parameter types, if integral promote to double.
-        using T1P = typename promote_arg<T1>::type; // T1 perhaps promoted.
-        using T2P = typename promote_arg<T2>::type; // T2 perhaps promoted.
-        using intermediate_type = typename std::conditional<
-          std::is_floating_point<T1P>::value && std::is_floating_point<T2P>::value, // both T1P and T2P are floating-point?
-#ifdef __STDCPP_FLOAT128_T__
-           typename std::conditional<std::is_same<std::float128_t, T1P>::value || std::is_same<std::float128_t, T2P>::value, // either long double?
-            std::float128_t,
-#endif 
-#ifdef BOOST_MATH_USE_FLOAT128
-           typename std::conditional<std::is_same<__float128, T1P>::value || std::is_same<__float128, T2P>::value, // either long double?
-            __float128,
-#endif 
-             typename std::conditional<std::is_same<long double, T1P>::value || std::is_same<long double, T2P>::value, // either long double?
-               long double, // then result type is long double.
-#ifdef __STDCPP_FLOAT64_T__
-             typename std::conditional<std::is_same<std::float64_t, T1P>::value || std::is_same<std::float64_t, T2P>::value, // either float64?
-               std::float64_t, // then result type is float64_t.
-#endif
-               typename std::conditional<std::is_same<double, T1P>::value || std::is_same<double, T2P>::value, // either double?
-                  double, // result type is double.
-#ifdef __STDCPP_FLOAT32_T__
-             typename std::conditional<std::is_same<std::float32_t, T1P>::value || std::is_same<std::float32_t, T2P>::value, // either float32?
-               std::float32_t, // then result type is float32_t.
-#endif
-                  float // else result type is float.
-             >::type
-#ifdef BOOST_MATH_USE_FLOAT128
-             >::type
-#endif
-#ifdef __STDCPP_FLOAT128_T__
-             >::type
-#endif
-#ifdef __STDCPP_FLOAT64_T__
-             >::type
-#endif
-#ifdef __STDCPP_FLOAT32_T__
-             >::type
-#endif
-             >::type,
-          // else one or the other is a user-defined type:
-          typename std::conditional<!std::is_floating_point<T2P>::value && std::is_convertible<T1P, T2P>::value, T2P, T1P>::type>::type;
-
-#ifdef __STDCPP_FLOAT64_T__
-          // If long doubles are doubles then we should prefer to use std::float64_t when available
-          using type = std::conditional_t<(sizeof(double) == sizeof(long double) && std::is_same<intermediate_type, long double>::value), std::float64_t, intermediate_type>;
-#else
-          using type = intermediate_type;
-#endif
-      }; // promote_arg2
-      // These full specialisations reduce std::conditional usage and speed up
-      // compilation:
-      template <> struct promote_args_2<float, float> { using type = float; };
-      template <> struct promote_args_2<double, double>{ using type = double; };
-      template <> struct promote_args_2<long double, long double> { using type = long double; };
-      template <> struct promote_args_2<int, int> {  using type = double; };
-      template <> struct promote_args_2<int, float> {  using type = double; };
-      template <> struct promote_args_2<float, int> {  using type = double; };
-      template <> struct promote_args_2<int, double> {  using type = double; };
-      template <> struct promote_args_2<double, int> {  using type = double; };
-      template <> struct promote_args_2<int, long double> {  using type = long double; };
-      template <> struct promote_args_2<long double, int> {  using type = long double; };
-      template <> struct promote_args_2<float, double> {  using type = double; };
-      template <> struct promote_args_2<double, float> {  using type = double; };
-      template <> struct promote_args_2<float, long double> {  using type = long double; };
-      template <> struct promote_args_2<long double, float> {  using type = long double; };
-      template <> struct promote_args_2<double, long double> {  using type = long double; };
-      template <> struct promote_args_2<long double, double> {  using type = long double; };
-
-      #ifdef __STDCPP_FLOAT128_T__
-      template <> struct promote_args_2<int, std::float128_t> { using type = std::float128_t; };
-      template <> struct promote_args_2<std::float128_t, int> { using type = std::float128_t; };
-      template <> struct promote_args_2<std::float128_t, float> { using type = std::float128_t; };
-      template <> struct promote_args_2<float, std::float128_t> { using type = std::float128_t; };
-      template <> struct promote_args_2<std::float128_t, double> { using type = std::float128_t; };
-      template <> struct promote_args_2<double, std::float128_t> { using type = std::float128_t; };
-      template <> struct promote_args_2<std::float128_t, long double> { using type = std::float128_t; };
-      template <> struct promote_args_2<long double, std::float128_t> { using type = std::float128_t; };
-
-      #ifdef __STDCPP_FLOAT16_T__
-      template <> struct promote_args_2<std::float128_t, std::float16_t> { using type = std::float128_t; };
-      template <> struct promote_args_2<std::float16_t, std::float128_t> { using type = std::float128_t; };
-      #endif
-
-      #ifdef __STDCPP_FLOAT32_T__
-      template <> struct promote_args_2<std::float128_t, std::float32_t> { using type = std::float128_t; };
-      template <> struct promote_args_2<std::float32_t, std::float128_t> { using type = std::float128_t; };
-      #endif
-
-      #ifdef __STDCPP_FLOAT64_T__
-      template <> struct promote_args_2<std::float128_t, std::float64_t> { using type = std::float128_t; };
-      template <> struct promote_args_2<std::float64_t, std::float128_t> { using type = std::float128_t; };
-      #endif
-
-      template <> struct promote_args_2<std::float128_t, std::float128_t> { using type = std::float128_t; };
-      #endif
-
-      #ifdef __STDCPP_FLOAT64_T__
-      template <> struct promote_args_2<int, std::float64_t> { using type = std::float64_t; };
-      template <> struct promote_args_2<std::float64_t, int> { using type = std::float64_t; };
-      template <> struct promote_args_2<std::float64_t, float> { using type = std::float64_t; };
-      template <> struct promote_args_2<float, std::float64_t> { using type = std::float64_t; };
-      template <> struct promote_args_2<std::float64_t, double> { using type = std::float64_t; };
-      template <> struct promote_args_2<double, std::float64_t> { using type = std::float64_t; };
-      template <> struct promote_args_2<std::float64_t, long double> { using type = long double; };
-      template <> struct promote_args_2<long double, std::float64_t> { using type = long double; };
-
-      #ifdef __STDCPP_FLOAT16_T__
-      template <> struct promote_args_2<std::float64_t, std::float16_t> { using type = std::float64_t; };
-      template <> struct promote_args_2<std::float16_t, std::float64_t> { using type = std::float64_t; };
-      #endif
-
-      #ifdef __STDCPP_FLOAT32_T__
-      template <> struct promote_args_2<std::float64_t, std::float32_t> { using type = std::float64_t; };
-      template <> struct promote_args_2<std::float32_t, std::float64_t> { using type = std::float64_t; };
-      #endif
-
-      template <> struct promote_args_2<std::float64_t, std::float64_t> { using type = std::float64_t; };
-      #endif
-
-      #ifdef __STDCPP_FLOAT32_T__
-      template <> struct promote_args_2<int, std::float32_t> { using type = std::float32_t; };
-      template <> struct promote_args_2<std::float32_t, int> { using type = std::float32_t; };
-      template <> struct promote_args_2<std::float32_t, float> { using type = std::float32_t; };
-      template <> struct promote_args_2<float, std::float32_t> { using type = std::float32_t; };
-      template <> struct promote_args_2<std::float32_t, double> { using type = double; };
-      template <> struct promote_args_2<double, std::float32_t> { using type = double; };
-      template <> struct promote_args_2<std::float32_t, long double> { using type = long double; };
-      template <> struct promote_args_2<long double, std::float32_t> { using type = long double; };
-
-      #ifdef __STDCPP_FLOAT16_T__
-      template <> struct promote_args_2<std::float32_t, std::float16_t> { using type = std::float32_t; };
-      template <> struct promote_args_2<std::float16_t, std::float32_t> { using type = std::float32_t; };
-      #endif
-
-      template <> struct promote_args_2<std::float32_t, std::float32_t> { using type = std::float32_t; };
-      #endif
+      struct pa2_integral_already_removed<T1, T2, true> {
+         using type = boost::math::common_type_t<T1, T2, float>;
+      };
 
-      #ifdef __STDCPP_FLOAT16_T__
-      template <> struct promote_args_2<int, std::float16_t> { using type = std::float16_t; };
-      template <> struct promote_args_2<std::float16_t, int> { using type = std::float16_t; };
-      template <> struct promote_args_2<std::float16_t, float> { using type = float; };
-      template <> struct promote_args_2<float, std::float16_t> { using type = float; };
-      template <> struct promote_args_2<std::float16_t, double> { using type = double; };
-      template <> struct promote_args_2<double, std::float16_t> { using type = double; };
-      template <> struct promote_args_2<std::float16_t, long double> { using type = long double; };
-      template <> struct promote_args_2<long double, std::float16_t> { using type = long double; };
 
-      template <> struct promote_args_2<std::float16_t, std::float16_t> { using type = std::float16_t; };
-      #endif
+      // Template definition for promote_args_permissive
+      template <typename... Args>
+      struct promote_args_permissive;
+      // Specialization for one argument
+      template <typename T>
+      struct promote_args_permissive<T> {
+         using type = typename promote_arg<typename boost::math::remove_cv<T>::type>::type;
+      };
+      // Specialization for two or more arguments
+      template <typename T1, typename... T2_to_TN>
+      struct promote_args_permissive<T1, T2_to_TN...> {
+         using type = typename pa2_integral_already_removed<
+                  typename promote_args_permissive<T1>::type,
+                  typename promote_args_permissive<T2_to_TN...>::type
+               >::type;
+      };
 
-      template <typename T, typename U>
-      using promote_args_2_t = typename promote_args_2<T, U>::type;
+      template <class... Args>
+      using promote_args_permissive_t = typename promote_args_permissive<Args...>::type;
 
-      template <class T1, class T2=float, class T3=float, class T4=float, class T5=float, class T6=float>
-      struct promote_args
-      {
-         using type = typename promote_args_2<
-            typename std::remove_cv<T1>::type,
-            typename promote_args_2<
-               typename std::remove_cv<T2>::type,
-               typename promote_args_2<
-                  typename std::remove_cv<T3>::type,
-                  typename promote_args_2<
-                     typename std::remove_cv<T4>::type,
-                     typename promote_args_2<
-                        typename std::remove_cv<T5>::type, typename std::remove_cv<T6>::type
-                     >::type
-                  >::type
-               >::type
-            >::type
-         >::type;
 
+      // Same as `promote_args_permissive` but with a static assertion that the promoted type
+      // is not `long double` if `BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS` is defined
+      template <class... Args>
+      struct promote_args {
+         using type = typename promote_args_permissive<Args...>::type;
 #if defined(BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS)
          //
          // Guard against use of long double if it's not supported:
          //
-         static_assert((0 == std::is_same<type, long double>::value), "Sorry, but this platform does not have sufficient long double support for the special functions to be reliably implemented.");
+         static_assert((0 == boost::math::is_same<type, long double>::value), "Sorry, but this platform does not have sufficient long double support for the special functions to be reliably implemented.");
 #endif
       };
 
-      template <class T1, class T2=float, class T3=float, class T4=float, class T5=float, class T6=float>
-      using promote_args_t = typename promote_args<T1, T2, T3, T4, T5, T6>::type;
-
-      //
-      // This struct is the same as above, but has no static assert on long double usage,
-      // it should be used only on functions that can be implemented for long double
-      // even when std lib support is missing or broken for that type.
-      //
-      template <class T1, class T2=float, class T3=float, class T4=float, class T5=float, class T6=float>
-      struct promote_args_permissive
-      {
-         using type = typename promote_args_2<
-            typename std::remove_cv<T1>::type,
-            typename promote_args_2<
-               typename std::remove_cv<T2>::type,
-               typename promote_args_2<
-                  typename std::remove_cv<T3>::type,
-                  typename promote_args_2<
-                     typename std::remove_cv<T4>::type,
-                     typename promote_args_2<
-                        typename std::remove_cv<T5>::type, typename std::remove_cv<T6>::type
-                     >::type
-                  >::type
-               >::type
-            >::type
-         >::type;
-      };
-
-      template <class T1, class T2=float, class T3=float, class T4=float, class T5=float, class T6=float>
-      using promote_args_permissive_t = typename promote_args_permissive<T1, T2, T3, T4, T5, T6>::type;
+      template <class... Args>
+      using promote_args_t = typename promote_args<Args...>::type;
 
     } // namespace tools
   } // namespace math
 } // namespace boost
 
 #endif // BOOST_MATH_PROMOTION_HPP
-
diff --git a/include/boost/math/tools/rational.hpp b/include/boost/math/tools/rational.hpp
index 69b7251539..a535abcdc5 100644
--- a/include/boost/math/tools/rational.hpp
+++ b/include/boost/math/tools/rational.hpp
@@ -10,9 +10,14 @@
 #pragma once
 #endif
 
-#include <array>
 #include <boost/math/tools/config.hpp>
 #include <boost/math/tools/assert.hpp>
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+
+#ifndef BOOST_MATH_HAS_NVRTC
+#include <array>
+#endif
 
 #if BOOST_MATH_POLY_METHOD == 1
 #  define BOOST_HEADER() <BOOST_MATH_JOIN(boost/math/tools/detail/polynomial_horner1_, BOOST_MATH_MAX_POLY_ORDER).hpp>
@@ -168,12 +173,12 @@ namespace boost{ namespace math{ namespace tools{
 // Forward declaration to keep two phase lookup happy:
 //
 template <class T, class U>
-U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U);
+BOOST_MATH_GPU_ENABLED U evaluate_polynomial(const T* poly, U const& z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U);
 
 namespace detail{
 
 template <class T, class V, class Tag>
-inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_MATH_NOEXCEPT(V)
 {
    return evaluate_polynomial(a, val, Tag::value);
 }
@@ -186,7 +191,7 @@ inline V evaluate_polynomial_c_imp(const T* a, const V& val, const Tag*) BOOST_M
 // the loop expanded versions above:
 //
 template <class T, class U>
-inline U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST_MATH_NOEXCEPT(U)
+BOOST_MATH_GPU_ENABLED inline U evaluate_polynomial(const T* poly, U const& z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U)
 {
    BOOST_MATH_ASSERT(count > 0);
    U sum = static_cast<U>(poly[count - 1]);
@@ -201,69 +206,75 @@ inline U evaluate_polynomial(const T* poly, U const& z, std::size_t count) BOOST
 // Compile time sized polynomials, just inline forwarders to the
 // implementations above:
 //
-template <std::size_t N, class T, class V>
-inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V)
+template <boost::math::size_t N, class T, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const T(&a)[N], const V& val) BOOST_MATH_NOEXCEPT(V)
 {
-   typedef std::integral_constant<int, N> tag_type;
+   typedef boost::math::integral_constant<int, N> tag_type;
    return detail::evaluate_polynomial_c_imp(static_cast<const T*>(a), val, static_cast<tag_type const*>(nullptr));
 }
 
-template <std::size_t N, class T, class V>
-inline V evaluate_polynomial(const std::array<T,N>& a, const V& val) BOOST_MATH_NOEXCEPT(V)
+#ifndef BOOST_MATH_HAS_NVRTC
+template <boost::math::size_t N, class T, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial(const std::array<T,N>& a, const V& val) BOOST_MATH_NOEXCEPT(V)
 {
-   typedef std::integral_constant<int, N> tag_type;
+   typedef boost::math::integral_constant<int, N> tag_type;
    return detail::evaluate_polynomial_c_imp(static_cast<const T*>(a.data()), val, static_cast<tag_type const*>(nullptr));
 }
+#endif
 //
 // Even polynomials are trivial: just square the argument!
 //
 template <class T, class U>
-inline U evaluate_even_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U)
+BOOST_MATH_GPU_ENABLED inline U evaluate_even_polynomial(const T* poly, U z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U)
 {
    return evaluate_polynomial(poly, U(z*z), count);
 }
 
-template <std::size_t N, class T, class V>
-inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
+template <boost::math::size_t N, class T, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
 {
    return evaluate_polynomial(a, V(z*z));
 }
 
-template <std::size_t N, class T, class V>
-inline V evaluate_even_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
+#ifndef BOOST_MATH_HAS_NVRTC
+template <boost::math::size_t N, class T, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_even_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
 {
    return evaluate_polynomial(a, V(z*z));
 }
+#endif
 //
 // Odd polynomials come next:
 //
 template <class T, class U>
-inline U evaluate_odd_polynomial(const T* poly, U z, std::size_t count) BOOST_MATH_NOEXCEPT(U)
+BOOST_MATH_GPU_ENABLED inline U evaluate_odd_polynomial(const T* poly, U z, boost::math::size_t count) BOOST_MATH_NOEXCEPT(U)
 {
    return poly[0] + z * evaluate_polynomial(poly+1, U(z*z), count-1);
 }
 
-template <std::size_t N, class T, class V>
-inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
+template <boost::math::size_t N, class T, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const T(&a)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
 {
-   typedef std::integral_constant<int, N-1> tag_type;
+   typedef boost::math::integral_constant<int, N-1> tag_type;
    return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast<const T*>(a) + 1, V(z*z), static_cast<tag_type const*>(nullptr));
 }
 
-template <std::size_t N, class T, class V>
-inline V evaluate_odd_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
+#ifndef BOOST_MATH_HAS_NVRTC
+template <boost::math::size_t N, class T, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_odd_polynomial(const std::array<T,N>& a, const V& z) BOOST_MATH_NOEXCEPT(V)
 {
-   typedef std::integral_constant<int, N-1> tag_type;
+   typedef boost::math::integral_constant<int, N-1> tag_type;
    return a[0] + z * detail::evaluate_polynomial_c_imp(static_cast<const T*>(a.data()) + 1, V(z*z), static_cast<tag_type const*>(nullptr));
 }
+#endif
 
 template <class T, class U, class V>
-V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V);
+BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, boost::math::size_t count) BOOST_MATH_NOEXCEPT(V);
 
 namespace detail{
 
 template <class T, class U, class V, class Tag>
-inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const Tag*) BOOST_MATH_NOEXCEPT(V)
 {
    return boost::math::tools::evaluate_rational(num, denom, z, Tag::value);
 }
@@ -278,7 +289,7 @@ inline V evaluate_rational_c_imp(const T* num, const U* denom, const V& z, const
 // in our Lanczos code for example.
 //
 template <class T, class U, class V>
-V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count) BOOST_MATH_NOEXCEPT(V)
+BOOST_MATH_GPU_ENABLED V evaluate_rational(const T* num, const U* denom, const V& z_, boost::math::size_t count) BOOST_MATH_NOEXCEPT(V)
 {
    V z(z_);
    V s1, s2;
@@ -310,17 +321,19 @@ V evaluate_rational(const T* num, const U* denom, const V& z_, std::size_t count
    return s1 / s2;
 }
 
-template <std::size_t N, class T, class U, class V>
-inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
+template <boost::math::size_t N, class T, class U, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const T(&a)[N], const U(&b)[N], const V& z) BOOST_MATH_NOEXCEPT(V)
 {
-   return detail::evaluate_rational_c_imp(a, b, z, static_cast<const std::integral_constant<int, N>*>(nullptr));
+   return detail::evaluate_rational_c_imp(a, b, z, static_cast<const boost::math::integral_constant<int, N>*>(nullptr));
 }
 
-template <std::size_t N, class T, class U, class V>
-inline V evaluate_rational(const std::array<T,N>& a, const std::array<U,N>& b, const V& z) BOOST_MATH_NOEXCEPT(V)
+#ifndef BOOST_MATH_HAS_NVRTC
+template <boost::math::size_t N, class T, class U, class V>
+BOOST_MATH_GPU_ENABLED BOOST_MATH_GPU_ENABLED inline V evaluate_rational(const std::array<T,N>& a, const std::array<U,N>& b, const V& z) BOOST_MATH_NOEXCEPT(V)
 {
-   return detail::evaluate_rational_c_imp(a.data(), b.data(), z, static_cast<std::integral_constant<int, N>*>(nullptr));
+   return detail::evaluate_rational_c_imp(a.data(), b.data(), z, static_cast<boost::math::integral_constant<int, N>*>(nullptr));
 }
+#endif
 
 } // namespace tools
 } // namespace math
diff --git a/include/boost/math/tools/roots.hpp b/include/boost/math/tools/roots.hpp
index 97e67fae95..b0b0fc246c 100644
--- a/include/boost/math/tools/roots.hpp
+++ b/include/boost/math/tools/roots.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,20 +10,21 @@
 #ifdef _MSC_VER
 #pragma once
 #endif
-#include <boost/math/tools/complex.hpp> // test for multiprecision types in complex Newton
-
-#include <utility>
-#include <cmath>
-#include <tuple>
-#include <cstdint>
 
 #include <boost/math/tools/config.hpp>
-#include <boost/math/tools/cxx03_warn.hpp>
-
+#include <boost/math/tools/complex.hpp> // test for multiprecision types in complex Newton
+#include <boost/math/tools/type_traits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
 #include <boost/math/special_functions/sign.hpp>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/policies/error_handling.hpp>
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
 #include <boost/math/special_functions/next.hpp>
 #include <boost/math/tools/toms748_solve.hpp>
-#include <boost/math/policies/error_handling.hpp>
+#endif
 
 namespace boost {
 namespace math {
@@ -33,11 +35,11 @@ namespace detail {
 namespace dummy {
 
    template<int n, class T>
-   typename T::value_type get(const T&) BOOST_MATH_NOEXCEPT(T);
+   BOOST_MATH_GPU_ENABLED typename T::value_type get(const T&) BOOST_MATH_NOEXCEPT(T);
 }
 
 template <class Tuple, class T>
-void unpack_tuple(const Tuple& t, T& a, T& b) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED void unpack_tuple(const Tuple& t, T& a, T& b) BOOST_MATH_NOEXCEPT(T)
 {
    using dummy::get;
    // Use ADL to find the right overload for get:
@@ -45,7 +47,7 @@ void unpack_tuple(const Tuple& t, T& a, T& b) BOOST_MATH_NOEXCEPT(T)
    b = get<1>(t);
 }
 template <class Tuple, class T>
-void unpack_tuple(const Tuple& t, T& a, T& b, T& c) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED void unpack_tuple(const Tuple& t, T& a, T& b, T& c) BOOST_MATH_NOEXCEPT(T)
 {
    using dummy::get;
    // Use ADL to find the right overload for get:
@@ -55,7 +57,7 @@ void unpack_tuple(const Tuple& t, T& a, T& b, T& c) BOOST_MATH_NOEXCEPT(T)
 }
 
 template <class Tuple, class T>
-inline void unpack_0(const Tuple& t, T& val) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED inline void unpack_0(const Tuple& t, T& val) BOOST_MATH_NOEXCEPT(T)
 {
    using dummy::get;
    // Rely on ADL to find the correct overload of get:
@@ -63,26 +65,30 @@ inline void unpack_0(const Tuple& t, T& val) BOOST_MATH_NOEXCEPT(T)
 }
 
 template <class T, class U, class V>
-inline void unpack_tuple(const std::pair<T, U>& p, V& a, V& b) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED inline void unpack_tuple(const boost::math::pair<T, U>& p, V& a, V& b) BOOST_MATH_NOEXCEPT(T)
 {
    a = p.first;
    b = p.second;
 }
 template <class T, class U, class V>
-inline void unpack_0(const std::pair<T, U>& p, V& a) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED inline void unpack_0(const boost::math::pair<T, U>& p, V& a) BOOST_MATH_NOEXCEPT(T)
 {
    a = p.first;
 }
 
 template <class F, class T>
-void handle_zero_derivative(F f,
+BOOST_MATH_GPU_ENABLED void handle_zero_derivative(F f,
    T& last_f0,
    const T& f0,
    T& delta,
    T& result,
    T& guess,
    const T& min,
-   const T& max) noexcept(BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+   const T& max) noexcept(BOOST_MATH_IS_FLOAT(T) 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+   && noexcept(std::declval<F>()(std::declval<T>()))
+   #endif
+   )
 {
    if (last_f0 == 0)
    {
@@ -128,25 +134,29 @@ void handle_zero_derivative(F f,
 } // namespace
 
 template <class F, class T, class Tol, class Policy>
-std::pair<T, T> bisect(F f, T min, T max, Tol tol, std::uintmax_t& max_iter, const Policy& pol) noexcept(policies::is_noexcept_error_policy<Policy>::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+BOOST_MATH_GPU_ENABLED boost::math::pair<T, T> bisect(F f, T min, T max, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol) noexcept(policies::is_noexcept_error_policy<Policy>::value && BOOST_MATH_IS_FLOAT(T) 
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<F>()(std::declval<T>()))
+#endif
+)
 {
    T fmin = f(min);
    T fmax = f(max);
    if (fmin == 0)
    {
       max_iter = 2;
-      return std::make_pair(min, min);
+      return boost::math::make_pair(min, min);
    }
    if (fmax == 0)
    {
       max_iter = 2;
-      return std::make_pair(max, max);
+      return boost::math::make_pair(max, max);
    }
 
    //
    // Error checking:
    //
-   static const char* function = "boost::math::tools::bisect<%1%>";
+   constexpr auto function = "boost::math::tools::bisect<%1%>";
    if (min >= max)
    {
       return boost::math::detail::pair_from_single(policies::raise_evaluation_error(function,
@@ -196,29 +206,41 @@ std::pair<T, T> bisect(F f, T min, T max, Tol tol, std::uintmax_t& max_iter, con
    std::cout << "Bisection required " << max_iter << " iterations.\n";
 #endif
 
-   return std::make_pair(min, max);
+   return boost::math::make_pair(min, max);
 }
 
 template <class F, class T, class Tol>
-inline std::pair<T, T> bisect(F f, T min, T max, Tol tol, std::uintmax_t& max_iter)  noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<T, T> bisect(F f, T min, T max, Tol tol, boost::math::uintmax_t& max_iter)  noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value && BOOST_MATH_IS_FLOAT(T)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<F>()(std::declval<T>()))
+#endif
+)
 {
    return bisect(f, min, max, tol, max_iter, policies::policy<>());
 }
 
 template <class F, class T, class Tol>
-inline std::pair<T, T> bisect(F f, T min, T max, Tol tol) noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<T, T> bisect(F f, T min, T max, Tol tol) noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value && BOOST_MATH_IS_FLOAT(T) 
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<F>()(std::declval<T>()))
+#endif
+)
 {
-   std::uintmax_t m = (std::numeric_limits<std::uintmax_t>::max)();
+   boost::math::uintmax_t m = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
    return bisect(f, min, max, tol, m, policies::policy<>());
 }
 
 
 template <class F, class T>
-T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+BOOST_MATH_GPU_ENABLED T newton_raphson_iterate(F f, T guess, T min, T max, int digits, boost::math::uintmax_t& max_iter) noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value && BOOST_MATH_IS_FLOAT(T)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<F>()(std::declval<T>()))
+#endif
+)
 {
    BOOST_MATH_STD_USING
 
-   static const char* function = "boost::math::tools::newton_raphson_iterate<%1%>";
+   constexpr auto function = "boost::math::tools::newton_raphson_iterate<%1%>";
    if (min > max)
    {
       return policies::raise_evaluation_error(function, "Range arguments in wrong order in boost::math::tools::newton_raphson_iterate(first arg=%1%)", min, boost::math::policies::policy<>());
@@ -245,7 +267,7 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t&
    T max_range_f = 0;
    T min_range_f = 0;
 
-   std::uintmax_t count(max_iter);
+   boost::math::uintmax_t count(max_iter);
 
 #ifdef BOOST_MATH_INSTRUMENT
    std::cout << "Newton_raphson_iterate, guess = " << guess << ", min = " << min << ", max = " << max
@@ -332,12 +354,22 @@ T newton_raphson_iterate(F f, T guess, T min, T max, int digits, std::uintmax_t&
 }
 
 template <class F, class T>
-inline T newton_raphson_iterate(F f, T guess, T min, T max, int digits) noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value&& BOOST_MATH_IS_FLOAT(T) && noexcept(std::declval<F>()(std::declval<T>())))
+BOOST_MATH_GPU_ENABLED inline T newton_raphson_iterate(F f, T guess, T min, T max, int digits) noexcept(policies::is_noexcept_error_policy<policies::policy<> >::value && BOOST_MATH_IS_FLOAT(T)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<F>()(std::declval<T>()))
+#endif
+)
 {
-   std::uintmax_t m = (std::numeric_limits<std::uintmax_t>::max)();
+   boost::math::uintmax_t m = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
    return newton_raphson_iterate(f, guess, min, max, digits, m);
 }
 
+// TODO(mborland): Disabled for now
+// Recursion needs to be removed, but there is no demand at this time
+#ifdef BOOST_MATH_HAS_NVRTC
+}}} // Namespaces
+#else
+
 namespace detail {
 
    struct halley_step
@@ -1025,4 +1057,6 @@ inline std::pair<typename tools::promote_args<T1, T2, T3>::type, typename tools:
 } // namespace math
 } // namespace boost
 
+#endif // BOOST_MATH_HAS_NVRTC
+
 #endif // BOOST_MATH_TOOLS_NEWTON_SOLVER_HPP
diff --git a/include/boost/math/tools/series.hpp b/include/boost/math/tools/series.hpp
index a4822fea43..4617ea3df7 100644
--- a/include/boost/math/tools/series.hpp
+++ b/include/boost/math/tools/series.hpp
@@ -10,10 +10,11 @@
 #pragma once
 #endif
 
-#include <cmath>
-#include <cstdint>
-#include <limits>
+
 #include <boost/math/tools/config.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/cstdint.hpp>
+#include <boost/math/tools/type_traits.hpp>
 
 namespace boost{ namespace math{ namespace tools{
 
@@ -21,13 +22,17 @@ namespace boost{ namespace math{ namespace tools{
 // Simple series summation come first:
 //
 template <class Functor, class U, class V>
-inline typename Functor::result_type sum_series(Functor& func, const U& factor, std::uintmax_t& max_terms, const V& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, const U& factor, boost::math::uintmax_t& max_terms, const V& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
 
    typedef typename Functor::result_type result_type;
 
-   std::uintmax_t counter = max_terms;
+   boost::math::uintmax_t counter = max_terms;
 
    result_type result = init_value;
    result_type next_term;
@@ -44,14 +49,22 @@ inline typename Functor::result_type sum_series(Functor& func, const U& factor,
 }
 
 template <class Functor, class U>
-inline typename Functor::result_type sum_series(Functor& func, const U& factor, std::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, const U& factor, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    typename Functor::result_type init_value = 0;
    return sum_series(func, factor, max_terms, init_value);
 }
 
 template <class Functor, class U>
-inline typename Functor::result_type sum_series(Functor& func, int bits, std::uintmax_t& max_terms, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits, boost::math::uintmax_t& max_terms, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
    typedef typename Functor::result_type result_type;
@@ -60,17 +73,25 @@ inline typename Functor::result_type sum_series(Functor& func, int bits, std::ui
 }
 
 template <class Functor>
-inline typename Functor::result_type sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) 
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
    typedef typename Functor::result_type result_type;
-   std::uintmax_t iters = (std::numeric_limits<std::uintmax_t>::max)();
+   boost::math::uintmax_t iters = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
    result_type init_val = 0;
    return sum_series(func, bits, iters, init_val);
 }
 
 template <class Functor>
-inline typename Functor::result_type sum_series(Functor& func, int bits, std::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
    typedef typename Functor::result_type result_type;
@@ -79,23 +100,31 @@ inline typename Functor::result_type sum_series(Functor& func, int bits, std::ui
 }
 
 template <class Functor, class U>
-inline typename Functor::result_type sum_series(Functor& func, int bits, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type sum_series(Functor& func, int bits, const U& init_value) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
-   std::uintmax_t iters = (std::numeric_limits<std::uintmax_t>::max)();
+   boost::math::uintmax_t iters = (boost::math::numeric_limits<boost::math::uintmax_t>::max)();
    return sum_series(func, bits, iters, init_value);
 }
 //
 // Checked summation:
 //
 template <class Functor, class U, class V>
-inline typename Functor::result_type checked_sum_series(Functor& func, const U& factor, std::uintmax_t& max_terms, const V& init_value, V& norm) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type checked_sum_series(Functor& func, const U& factor, boost::math::uintmax_t& max_terms, const V& init_value, V& norm) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
 
    typedef typename Functor::result_type result_type;
 
-   std::uintmax_t counter = max_terms;
+   boost::math::uintmax_t counter = max_terms;
 
    result_type result = init_value;
    result_type next_term;
@@ -125,7 +154,11 @@ inline typename Functor::result_type checked_sum_series(Functor& func, const U&
 // in any case the result is still much better than a naive summation.
 //
 template <class Functor>
-inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
 
@@ -148,13 +181,17 @@ inline typename Functor::result_type kahan_sum_series(Functor& func, int bits) n
 }
 
 template <class Functor>
-inline typename Functor::result_type kahan_sum_series(Functor& func, int bits, std::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type) && noexcept(std::declval<Functor>()()))
+BOOST_MATH_GPU_ENABLED inline typename Functor::result_type kahan_sum_series(Functor& func, int bits, boost::math::uintmax_t& max_terms) noexcept(BOOST_MATH_IS_FLOAT(typename Functor::result_type)
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+&& noexcept(std::declval<Functor>()())
+#endif
+)
 {
    BOOST_MATH_STD_USING
 
    typedef typename Functor::result_type result_type;
 
-   std::uintmax_t counter = max_terms;
+   boost::math::uintmax_t counter = max_terms;
 
    result_type factor = ldexp(result_type(1), bits);
    result_type result = func();
diff --git a/include/boost/math/tools/toms748_solve.hpp b/include/boost/math/tools/toms748_solve.hpp
index ea93713224..dee2346853 100644
--- a/include/boost/math/tools/toms748_solve.hpp
+++ b/include/boost/math/tools/toms748_solve.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -10,13 +11,13 @@
 #pragma once
 #endif
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/tools/precision.hpp>
+#include <boost/math/tools/numeric_limits.hpp>
+#include <boost/math/tools/tuple.hpp>
+#include <boost/math/tools/cstdint.hpp>
 #include <boost/math/policies/error_handling.hpp>
-#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/sign.hpp>
-#include <limits>
-#include <utility>
-#include <cstdint>
 
 #ifdef BOOST_MATH_LOG_ROOT_ITERATIONS
 #  define BOOST_MATH_LOGGER_INCLUDE <boost/math/tools/iteration_logger.hpp>
@@ -32,29 +33,36 @@ template <class T>
 class eps_tolerance
 {
 public:
-   eps_tolerance() : eps(4 * tools::epsilon<T>())
+   BOOST_MATH_GPU_ENABLED eps_tolerance() : eps(4 * tools::epsilon<T>())
    {
 
    }
-   eps_tolerance(unsigned bits)
+   BOOST_MATH_GPU_ENABLED eps_tolerance(unsigned bits)
    {
       BOOST_MATH_STD_USING
-      eps = (std::max)(T(ldexp(1.0F, 1-bits)), T(4 * tools::epsilon<T>()));
+      eps = BOOST_MATH_GPU_SAFE_MAX(T(ldexp(1.0F, 1-bits)), T(4 * tools::epsilon<T>()));
    }
-   bool operator()(const T& a, const T& b)
+   BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b)
    {
       BOOST_MATH_STD_USING
-      return fabs(a - b) <= (eps * (std::min)(fabs(a), fabs(b)));
+      return fabs(a - b) <= (eps * BOOST_MATH_GPU_SAFE_MIN(fabs(a), fabs(b)));
    }
 private:
    T eps;
 };
 
+// CUDA warns about __host__ __device__ marker on defaulted constructor
+// but the warning is benign 
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  pragma nv_diag_suppress 20012
+#endif
+
 struct equal_floor
 {
-   equal_floor()= default;
+   BOOST_MATH_GPU_ENABLED equal_floor() = default;
+   
    template <class T>
-   bool operator()(const T& a, const T& b)
+   BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b)
    {
       BOOST_MATH_STD_USING
       return (floor(a) == floor(b)) || (fabs((b-a)/b) < boost::math::tools::epsilon<T>() * 2);
@@ -63,9 +71,10 @@ struct equal_floor
 
 struct equal_ceil
 {
-   equal_ceil()= default;
+   BOOST_MATH_GPU_ENABLED equal_ceil() = default;
+   
    template <class T>
-   bool operator()(const T& a, const T& b)
+   BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b)
    {
       BOOST_MATH_STD_USING
       return (ceil(a) == ceil(b)) || (fabs((b - a) / b) < boost::math::tools::epsilon<T>() * 2);
@@ -74,19 +83,24 @@ struct equal_ceil
 
 struct equal_nearest_integer
 {
-   equal_nearest_integer()= default;
+   BOOST_MATH_GPU_ENABLED equal_nearest_integer() = default;
+   
    template <class T>
-   bool operator()(const T& a, const T& b)
+   BOOST_MATH_GPU_ENABLED bool operator()(const T& a, const T& b)
    {
       BOOST_MATH_STD_USING
       return (floor(a + 0.5f) == floor(b + 0.5f)) || (fabs((b - a) / b) < boost::math::tools::epsilon<T>() * 2);
    }
 };
 
+#ifdef BOOST_MATH_ENABLE_CUDA
+#  pragma nv_diag_default 20012
+#endif
+
 namespace detail{
 
 template <class F, class T>
-void bracket(F f, T& a, T& b, T c, T& fa, T& fb, T& d, T& fd)
+BOOST_MATH_GPU_ENABLED void bracket(F f, T& a, T& b, T c, T& fa, T& fb, T& d, T& fd)
 {
    //
    // Given a point c inside the existing enclosing interval
@@ -150,7 +164,7 @@ void bracket(F f, T& a, T& b, T c, T& fa, T& fb, T& d, T& fd)
 }
 
 template <class T>
-inline T safe_div(T num, T denom, T r)
+BOOST_MATH_GPU_ENABLED inline T safe_div(T num, T denom, T r)
 {
    //
    // return num / denom without overflow,
@@ -167,7 +181,7 @@ inline T safe_div(T num, T denom, T r)
 }
 
 template <class T>
-inline T secant_interpolate(const T& a, const T& b, const T& fa, const T& fb)
+BOOST_MATH_GPU_ENABLED inline T secant_interpolate(const T& a, const T& b, const T& fa, const T& fb)
 {
    //
    // Performs standard secant interpolation of [a,b] given
@@ -188,9 +202,9 @@ inline T secant_interpolate(const T& a, const T& b, const T& fa, const T& fb)
 }
 
 template <class T>
-T quadratic_interpolate(const T& a, const T& b, T const& d,
-                           const T& fa, const T& fb, T const& fd, 
-                           unsigned count)
+BOOST_MATH_GPU_ENABLED T quadratic_interpolate(const T& a, const T& b, T const& d,
+                                               const T& fa, const T& fb, T const& fd, 
+                                               unsigned count)
 {
    //
    // Performs quadratic interpolation to determine the next point,
@@ -244,9 +258,9 @@ T quadratic_interpolate(const T& a, const T& b, T const& d,
 }
 
 template <class T>
-T cubic_interpolate(const T& a, const T& b, const T& d, 
-                    const T& e, const T& fa, const T& fb, 
-                    const T& fd, const T& fe)
+BOOST_MATH_GPU_ENABLED T cubic_interpolate(const T& a, const T& b, const T& d, 
+                                           const T& e, const T& fa, const T& fb, 
+                                           const T& fd, const T& fe)
 {
    //
    // Uses inverse cubic interpolation of f(x) at points 
@@ -293,7 +307,7 @@ T cubic_interpolate(const T& a, const T& b, const T& d,
 } // namespace detail
 
 template <class F, class T, class Tol, class Policy>
-std::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, std::uintmax_t& max_iter, const Policy& pol)
+BOOST_MATH_GPU_ENABLED boost::math::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol)
 {
    //
    // Main entry point and logic for Toms Algorithm 748
@@ -301,15 +315,15 @@ std::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, const T& fax, const
    //
    BOOST_MATH_STD_USING  // For ADL of std math functions
 
-   static const char* function = "boost::math::tools::toms748_solve<%1%>";
+   constexpr auto function = "boost::math::tools::toms748_solve<%1%>";
 
    //
    // Sanity check - are we allowed to iterate at all?
    //
    if (max_iter == 0)
-      return std::make_pair(ax, bx);
+      return boost::math::make_pair(ax, bx);
 
-   std::uintmax_t count = max_iter;
+   boost::math::uintmax_t count = max_iter;
    T a, b, fa, fb, c, u, fu, a0, b0, d, fd, e, fe;
    static const T mu = 0.5f;
 
@@ -330,7 +344,7 @@ std::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, const T& fax, const
          b = a;
       else if(fb == 0)
          a = b;
-      return std::make_pair(a, b);
+      return boost::math::make_pair(a, b);
    }
 
    if(boost::math::sign(fa) * boost::math::sign(fb) > 0)
@@ -472,37 +486,37 @@ std::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, const T& fax, const
       a = b;
    }
    BOOST_MATH_LOG_COUNT(max_iter)
-   return std::make_pair(a, b);
+   return boost::math::make_pair(a, b);
 }
 
 template <class F, class T, class Tol>
-inline std::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, std::uintmax_t& max_iter)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, const T& fax, const T& fbx, Tol tol, boost::math::uintmax_t& max_iter)
 {
    return toms748_solve(f, ax, bx, fax, fbx, tol, max_iter, policies::policy<>());
 }
 
 template <class F, class T, class Tol, class Policy>
-inline std::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, Tol tol, std::uintmax_t& max_iter, const Policy& pol)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol)
 {
    if (max_iter <= 2)
-      return std::make_pair(ax, bx);
+      return boost::math::make_pair(ax, bx);
    max_iter -= 2;
-   std::pair<T, T> r = toms748_solve(f, ax, bx, f(ax), f(bx), tol, max_iter, pol);
+   boost::math::pair<T, T> r = toms748_solve(f, ax, bx, f(ax), f(bx), tol, max_iter, pol);
    max_iter += 2;
    return r;
 }
 
 template <class F, class T, class Tol>
-inline std::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, Tol tol, std::uintmax_t& max_iter)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<T, T> toms748_solve(F f, const T& ax, const T& bx, Tol tol, boost::math::uintmax_t& max_iter)
 {
    return toms748_solve(f, ax, bx, tol, max_iter, policies::policy<>());
 }
 
 template <class F, class T, class Tol, class Policy>
-std::pair<T, T> bracket_and_solve_root(F f, const T& guess, T factor, bool rising, Tol tol, std::uintmax_t& max_iter, const Policy& pol)
+BOOST_MATH_GPU_ENABLED boost::math::pair<T, T> bracket_and_solve_root(F f, const T& guess, T factor, bool rising, Tol tol, boost::math::uintmax_t& max_iter, const Policy& pol)
 {
    BOOST_MATH_STD_USING
-   static const char* function = "boost::math::tools::bracket_and_solve_root<%1%>";
+   constexpr auto function = "boost::math::tools::bracket_and_solve_root<%1%>";
    //
    // Set up initial brackets:
    //
@@ -513,7 +527,7 @@ std::pair<T, T> bracket_and_solve_root(F f, const T& guess, T factor, bool risin
    //
    // Set up invocation count:
    //
-   std::uintmax_t count = max_iter - 1;
+   boost::math::uintmax_t count = max_iter - 1;
 
    int step = 32;
 
@@ -563,7 +577,7 @@ std::pair<T, T> bracket_and_solve_root(F f, const T& guess, T factor, bool risin
             // Escape route just in case the answer is zero!
             max_iter -= count;
             max_iter += 1;
-            return a > 0 ? std::make_pair(T(0), T(a)) : std::make_pair(T(a), T(0)); 
+            return a > 0 ? boost::math::make_pair(T(0), T(a)) : boost::math::make_pair(T(a), T(0)); 
          }
          if(count == 0)
             return boost::math::detail::pair_from_single(policies::raise_evaluation_error(function, "Unable to bracket root, last nearest value was %1%", a, pol));
@@ -592,7 +606,7 @@ std::pair<T, T> bracket_and_solve_root(F f, const T& guess, T factor, bool risin
    }
    max_iter -= count;
    max_iter += 1;
-   std::pair<T, T> r = toms748_solve(
+   boost::math::pair<T, T> r = toms748_solve(
       f, 
       (a < 0 ? b : a), 
       (a < 0 ? a : b), 
@@ -608,7 +622,7 @@ std::pair<T, T> bracket_and_solve_root(F f, const T& guess, T factor, bool risin
 }
 
 template <class F, class T, class Tol>
-inline std::pair<T, T> bracket_and_solve_root(F f, const T& guess, const T& factor, bool rising, Tol tol, std::uintmax_t& max_iter)
+BOOST_MATH_GPU_ENABLED inline boost::math::pair<T, T> bracket_and_solve_root(F f, const T& guess, const T& factor, bool rising, Tol tol, boost::math::uintmax_t& max_iter)
 {
    return bracket_and_solve_root(f, guess, factor, rising, tol, max_iter, policies::policy<>());
 }
diff --git a/include/boost/math/tools/tuple.hpp b/include/boost/math/tools/tuple.hpp
index b5e42fc59e..dcc763e37a 100644
--- a/include/boost/math/tools/tuple.hpp
+++ b/include/boost/math/tools/tuple.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2010.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -6,12 +7,65 @@
 #ifndef BOOST_MATH_TUPLE_HPP_INCLUDED
 #define BOOST_MATH_TUPLE_HPP_INCLUDED
 
-#include <boost/math/tools/cxx03_warn.hpp>
+#include <boost/math/tools/config.hpp>
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <boost/math/tools/type_traits.hpp>
+#include <cuda/std/utility>
+#include <cuda/std/tuple>
+
+namespace boost { 
+namespace math {
+
+using cuda::std::pair;
+using cuda::std::tuple;
+
+using cuda::std::make_pair;
+
+using cuda::std::tie;
+using cuda::std::get;
+
+using cuda::std::tuple_size;
+using cuda::std::tuple_element;
+
+namespace detail {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T&& forward(boost::math::remove_reference_t<T>& arg) noexcept
+{
+    return static_cast<T&&>(arg);
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED T&& forward(boost::math::remove_reference_t<T>&& arg) noexcept
+{
+    static_assert(!boost::math::is_lvalue_reference<T>::value, "Cannot forward an rvalue as an lvalue.");
+    return static_cast<T&&>(arg);
+}
+
+} // namespace detail
+
+template <typename T, typename... Ts>
+BOOST_MATH_GPU_ENABLED auto make_tuple(T&& t, Ts&&... ts) 
+{
+    return cuda::std::tuple<boost::math::decay_t<T>, boost::math::decay_t<Ts>...>(
+        boost::math::detail::forward<T>(t), boost::math::detail::forward<Ts>(ts)...
+    );
+}
+
+} // namespace math
+} // namespace boost
+
+#else
+
 #include <tuple>
 
-namespace boost{ namespace math{
+namespace boost { 
+namespace math {
 
 using ::std::tuple;
+using ::std::pair;
 
 // [6.1.3.2] Tuple creation functions
 using ::std::ignore;
@@ -23,5 +77,12 @@ using ::std::get;
 using ::std::tuple_size;
 using ::std::tuple_element;
 
-}}
+// Pair helpers
+using ::std::make_pair;
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_ENABLE_CUDA
+
 #endif
diff --git a/include/boost/math/tools/type_traits.hpp b/include/boost/math/tools/type_traits.hpp
new file mode 100644
index 0000000000..a13332797b
--- /dev/null
+++ b/include/boost/math/tools/type_traits.hpp
@@ -0,0 +1,494 @@
+//  Copyright (c) 2024 Matt Borland
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+//  Regular use of <type_traits> is not compatible with CUDA
+//  Adds aliases to unify the support
+//  Also adds convience overloads like is_same_v so we don't have to wait for C++17
+
+#ifndef BOOST_MATH_TOOLS_TYPE_TRAITS
+#define BOOST_MATH_TOOLS_TYPE_TRAITS
+
+#include <boost/math/tools/config.hpp>
+
+#ifdef BOOST_MATH_ENABLE_CUDA
+
+#include <cuda/std/type_traits>
+
+namespace boost {
+namespace math {
+
+// Helper classes
+using cuda::std::integral_constant;
+using cuda::std::true_type;
+using cuda::std::false_type;
+
+// Primary type categories
+using cuda::std::is_void;
+using cuda::std::is_null_pointer;
+using cuda::std::is_integral;
+using cuda::std::is_floating_point;
+using cuda::std::is_array;
+using cuda::std::is_enum;
+using cuda::std::is_union;
+using cuda::std::is_class;
+using cuda::std::is_function;
+using cuda::std::is_pointer;
+using cuda::std::is_lvalue_reference;
+using cuda::std::is_rvalue_reference;
+using cuda::std::is_member_object_pointer;
+using cuda::std::is_member_function_pointer;
+
+// Composite Type Categories
+using cuda::std::is_fundamental;
+using cuda::std::is_arithmetic;
+using cuda::std::is_scalar;
+using cuda::std::is_object;
+using cuda::std::is_compound;
+using cuda::std::is_reference;
+using cuda::std::is_member_pointer;
+
+// Type properties
+using cuda::std::is_const;
+using cuda::std::is_volatile;
+using cuda::std::is_trivial;
+using cuda::std::is_trivially_copyable;
+using cuda::std::is_standard_layout;
+using cuda::std::is_empty;
+using cuda::std::is_polymorphic;
+using cuda::std::is_abstract;
+using cuda::std::is_final;
+using cuda::std::is_signed;
+using cuda::std::is_unsigned; 
+
+// Supported Operations
+using cuda::std::is_constructible;
+using cuda::std::is_trivially_constructible;
+using cuda::std::is_nothrow_constructible;
+
+using cuda::std::is_default_constructible;
+using cuda::std::is_trivially_default_constructible;
+using cuda::std::is_nothrow_default_constructible;
+
+using cuda::std::is_copy_constructible;
+using cuda::std::is_trivially_copy_constructible;
+using cuda::std::is_nothrow_copy_constructible;
+
+using cuda::std::is_move_constructible;
+using cuda::std::is_trivially_move_constructible;
+using cuda::std::is_nothrow_move_constructible;
+
+using cuda::std::is_assignable;
+using cuda::std::is_trivially_assignable;
+using cuda::std::is_nothrow_assignable;
+
+using cuda::std::is_copy_assignable;
+using cuda::std::is_trivially_copy_assignable;
+using cuda::std::is_nothrow_copy_assignable;
+
+using cuda::std::is_move_assignable;
+using cuda::std::is_trivially_move_assignable;
+using cuda::std::is_nothrow_move_assignable;
+
+using cuda::std::is_destructible;
+using cuda::std::is_trivially_destructible;
+using cuda::std::is_nothrow_destructible;
+
+using cuda::std::has_virtual_destructor;
+
+// Property Queries
+using cuda::std::alignment_of;
+using cuda::std::rank;
+using cuda::std::extent;
+
+// Type Relationships
+using cuda::std::is_same;
+using cuda::std::is_base_of;
+using cuda::std::is_convertible;
+
+// Const-volatility specifiers
+using cuda::std::remove_cv;
+using cuda::std::remove_cv_t;
+using cuda::std::remove_const;
+using cuda::std::remove_const_t;
+using cuda::std::remove_volatile;
+using cuda::std::remove_volatile_t;
+using cuda::std::add_cv;
+using cuda::std::add_cv_t;
+using cuda::std::add_const;
+using cuda::std::add_const_t;
+using cuda::std::add_volatile;
+using cuda::std::add_volatile_t;
+
+// References
+using cuda::std::remove_reference;
+using cuda::std::remove_reference_t;
+using cuda::std::add_lvalue_reference;
+using cuda::std::add_lvalue_reference_t;
+using cuda::std::add_rvalue_reference;
+using cuda::std::add_rvalue_reference_t;
+
+// Pointers
+using cuda::std::remove_pointer;
+using cuda::std::remove_pointer_t;
+using cuda::std::add_pointer;
+using cuda::std::add_pointer_t;
+
+// Sign Modifiers
+using cuda::std::make_signed;
+using cuda::std::make_signed_t;
+using cuda::std::make_unsigned;
+using cuda::std::make_unsigned_t;
+
+// Arrays
+using cuda::std::remove_extent;
+using cuda::std::remove_extent_t;
+using cuda::std::remove_all_extents;
+using cuda::std::remove_all_extents_t;
+
+// Misc transformations
+using cuda::std::decay;
+using cuda::std::decay_t;
+using cuda::std::enable_if;
+using cuda::std::enable_if_t;
+using cuda::std::conditional;
+using cuda::std::conditional_t;
+using cuda::std::common_type;
+using cuda::std::common_type_t;
+using cuda::std::underlying_type;
+using cuda::std::underlying_type_t;
+
+#else // STD versions
+
+#include <type_traits>
+
+namespace boost {
+namespace math {
+
+// Helper classes
+using std::integral_constant;
+using std::true_type;
+using std::false_type;
+
+// Primary type categories
+using std::is_void;
+using std::is_null_pointer;
+using std::is_integral;
+using std::is_floating_point;
+using std::is_array;
+using std::is_enum;
+using std::is_union;
+using std::is_class;
+using std::is_function;
+using std::is_pointer;
+using std::is_lvalue_reference;
+using std::is_rvalue_reference;
+using std::is_member_object_pointer;
+using std::is_member_function_pointer;
+
+// Composite Type Categories
+using std::is_fundamental;
+using std::is_arithmetic;
+using std::is_scalar;
+using std::is_object;
+using std::is_compound;
+using std::is_reference;
+using std::is_member_pointer;
+
+// Type properties
+using std::is_const;
+using std::is_volatile;
+using std::is_trivial;
+using std::is_trivially_copyable;
+using std::is_standard_layout;
+using std::is_empty;
+using std::is_polymorphic;
+using std::is_abstract;
+using std::is_final;
+using std::is_signed;
+using std::is_unsigned; 
+
+// Supported Operations
+using std::is_constructible;
+using std::is_trivially_constructible;
+using std::is_nothrow_constructible;
+
+using std::is_default_constructible;
+using std::is_trivially_default_constructible;
+using std::is_nothrow_default_constructible;
+
+using std::is_copy_constructible;
+using std::is_trivially_copy_constructible;
+using std::is_nothrow_copy_constructible;
+
+using std::is_move_constructible;
+using std::is_trivially_move_constructible;
+using std::is_nothrow_move_constructible;
+
+using std::is_assignable;
+using std::is_trivially_assignable;
+using std::is_nothrow_assignable;
+
+using std::is_copy_assignable;
+using std::is_trivially_copy_assignable;
+using std::is_nothrow_copy_assignable;
+
+using std::is_move_assignable;
+using std::is_trivially_move_assignable;
+using std::is_nothrow_move_assignable;
+
+using std::is_destructible;
+using std::is_trivially_destructible;
+using std::is_nothrow_destructible;
+
+using std::has_virtual_destructor;
+
+// Property Queries
+using std::alignment_of;
+using std::rank;
+using std::extent;
+
+// Type Relationships
+using std::is_same;
+using std::is_base_of;
+using std::is_convertible;
+
+// Const-volatility specifiers
+using std::remove_cv;
+using std::remove_cv_t;
+using std::remove_const;
+using std::remove_const_t;
+using std::remove_volatile;
+using std::remove_volatile_t;
+using std::add_cv;
+using std::add_cv_t;
+using std::add_const;
+using std::add_const_t;
+using std::add_volatile;
+using std::add_volatile_t;
+
+// References
+using std::remove_reference;
+using std::remove_reference_t;
+using std::add_lvalue_reference;
+using std::add_lvalue_reference_t;
+using std::add_rvalue_reference;
+using std::add_rvalue_reference_t;
+
+// Pointers
+using std::remove_pointer;
+using std::remove_pointer_t;
+using std::add_pointer;
+using std::add_pointer_t;
+
+// Sign Modifiers
+using std::make_signed;
+using std::make_signed_t;
+using std::make_unsigned;
+using std::make_unsigned_t;
+
+// Arrays
+using std::remove_extent;
+using std::remove_extent_t;
+using std::remove_all_extents;
+using std::remove_all_extents_t;
+
+// Misc transformations
+using std::decay;
+using std::decay_t;
+using std::enable_if;
+using std::enable_if_t;
+using std::conditional;
+using std::conditional_t;
+using std::common_type;
+using std::common_type_t;
+using std::underlying_type;
+using std::underlying_type_t;
+
+#endif 
+
+template <bool B>
+using bool_constant = boost::math::integral_constant<bool, B>;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_void_v = boost::math::is_void<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_null_pointer_v = boost::math::is_null_pointer<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_integral_v = boost::math::is_integral<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_floating_point_v = boost::math::is_floating_point<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_array_v = boost::math::is_array<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_enum_v = boost::math::is_enum<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_union_v = boost::math::is_union<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_class_v = boost::math::is_class<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_function_v = boost::math::is_function<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_pointer_v = boost::math::is_pointer<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_lvalue_reference_v = boost::math::is_lvalue_reference<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_rvalue_reference_v = boost::math::is_rvalue_reference<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_member_object_pointer_v = boost::math::is_member_object_pointer<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_member_function_pointer_v = boost::math::is_member_function_pointer<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_fundamental_v = boost::math::is_fundamental<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_arithmetic_v = boost::math::is_arithmetic<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_scalar_v = boost::math::is_scalar<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_object_v = boost::math::is_object<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_compound_v = boost::math::is_compound<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_reference_v = boost::math::is_reference<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_member_pointer_v = boost::math::is_member_pointer<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_const_v = boost::math::is_const<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_volatile_v = boost::math::is_volatile<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivial_v = boost::math::is_trivial<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_copyable_v = boost::math::is_trivially_copyable<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_standard_layout_v = boost::math::is_standard_layout<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_empty_v = boost::math::is_empty<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_polymorphic_v = boost::math::is_polymorphic<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_abstract_v = boost::math::is_abstract<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_final_v = boost::math::is_final<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_signed_v = boost::math::is_signed<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_unsigned_v = boost::math::is_unsigned<T>::value;
+
+template <typename T, typename... Args>
+BOOST_MATH_INLINE_CONSTEXPR bool is_constructible_v = boost::math::is_constructible<T, Args...>::value;
+
+template <typename T, typename... Args>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_constructible_v = boost::math::is_trivially_constructible<T, Args...>::value;
+
+template <typename T, typename... Args>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_constructible_v = boost::math::is_nothrow_constructible<T, Args...>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_default_constructible_v = boost::math::is_default_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_default_constructible_v = boost::math::is_trivially_default_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_default_constructible_v = boost::math::is_nothrow_default_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_copy_constructible_v = boost::math::is_copy_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_copy_constructible_v = boost::math::is_trivially_copy_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_copy_constructible_v = boost::math::is_nothrow_copy_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_move_constructible_v = boost::math::is_move_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_move_constructible_v = boost::math::is_trivially_move_constructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_move_constructible_v = boost::math::is_nothrow_move_constructible<T>::value;
+
+template <typename T, typename U>
+BOOST_MATH_INLINE_CONSTEXPR bool is_assignable_v = boost::math::is_assignable<T, U>::value;
+
+template <typename T, typename U>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_assignable_v = boost::math::is_trivially_assignable<T, U>::value;
+
+template <typename T, typename U>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_assignable_v = boost::math::is_nothrow_assignable<T, U>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_copy_assignable_v = boost::math::is_copy_assignable<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_copy_assignable_v = boost::math::is_trivially_copy_assignable<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_copy_assignable_v = boost::math::is_nothrow_copy_assignable<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_move_assignable_v = boost::math::is_move_assignable<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_move_assignable_v = boost::math::is_trivially_move_assignable<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_move_assignable_v = boost::math::is_nothrow_move_assignable<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_destructible_v = boost::math::is_destructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_trivially_destructible_v = boost::math::is_trivially_destructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool is_nothrow_destructible_v = boost::math::is_nothrow_destructible<T>::value;
+
+template <typename T>
+BOOST_MATH_INLINE_CONSTEXPR bool has_virtual_destructor_v = boost::math::has_virtual_destructor<T>::value;
+
+template <typename T, typename U>
+BOOST_MATH_INLINE_CONSTEXPR bool is_same_v = boost::math::is_same<T, U>::value;
+
+template <typename T, typename U>
+BOOST_MATH_INLINE_CONSTEXPR bool is_base_of_v = boost::math::is_base_of<T, U>::value;
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_TOOLS_TYPE_TRAITS
diff --git a/include/boost/math/tools/utility.hpp b/include/boost/math/tools/utility.hpp
new file mode 100644
index 0000000000..3e22865780
--- /dev/null
+++ b/include/boost/math/tools/utility.hpp
@@ -0,0 +1,69 @@
+//  Copyright (c) 2024 Matt Borland
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_TOOLS_UTILITY
+#define BOOST_MATH_TOOLS_UTILITY
+
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+
+#include <utility>
+
+namespace boost {
+namespace math {
+
+template <typename T>
+constexpr T min BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b)
+{
+    return (std::min)(a, b);
+}
+
+template <typename T>
+constexpr T max BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b)
+{
+    return (std::max)(a, b);
+}
+
+template <typename T>
+void swap BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (T& a, T& b)
+{
+    return (std::swap)(a, b);
+}
+
+} // namespace math
+} // namespace boost
+
+#else
+
+namespace boost {
+namespace math {
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED constexpr T min BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b)
+{ 
+    return a < b ? a : b; 
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED constexpr T max BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (const T& a, const T& b)
+{ 
+    return a > b ? a : b;
+}
+
+template <typename T>
+BOOST_MATH_GPU_ENABLED constexpr void swap BOOST_MATH_PREVENT_MACRO_SUBSTITUTION (T& a, T& b)
+{ 
+    T t(a); 
+    a = b; 
+    b = t;
+}
+
+} // namespace math
+} // namespace boost
+
+#endif // BOOST_MATH_HAS_GPU_SUPPORT
+
+#endif // BOOST_MATH_TOOLS_UTILITY
diff --git a/include/boost/math/tools/workaround.hpp b/include/boost/math/tools/workaround.hpp
index 9b15c4e930..7edd1c12aa 100644
--- a/include/boost/math/tools/workaround.hpp
+++ b/include/boost/math/tools/workaround.hpp
@@ -23,7 +23,7 @@ namespace boost{ namespace math{ namespace tools{
 // std::fmod(1185.0L, 1.5L);
 //
 template <class T>
-inline T fmod_workaround(T a, T b) BOOST_MATH_NOEXCEPT(T)
+BOOST_MATH_GPU_ENABLED inline T fmod_workaround(T a, T b) BOOST_MATH_NOEXCEPT(T)
 {
    BOOST_MATH_STD_USING
    return fmod(a, b);
diff --git a/include_private/boost/math/tools/remez.hpp b/include_private/boost/math/tools/remez.hpp
index 8f817d7ce3..3fdd473969 100644
--- a/include_private/boost/math/tools/remez.hpp
+++ b/include_private/boost/math/tools/remez.hpp
@@ -10,7 +10,7 @@
 #pragma once
 #endif
 
-#include <boost/math/tools/solve.hpp>
+#include "solve.hpp"
 #include <boost/math/tools/minima.hpp>
 #include <boost/math/tools/roots.hpp>
 #include <boost/math/tools/polynomial.hpp>
diff --git a/include_private/boost/math/tools/test.hpp b/include_private/boost/math/tools/test.hpp
index 10f6143e2b..7547ef5be2 100644
--- a/include_private/boost/math/tools/test.hpp
+++ b/include_private/boost/math/tools/test.hpp
@@ -1,4 +1,5 @@
 //  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -253,6 +254,7 @@ test_result<Real> test_hetero(const A& a, F1 test_func, F2 expect_func)
    return result;
 }
 
+#ifndef BOOST_MATH_NO_EXCEPTIONS
 template <class Val, class Exception>
 void test_check_throw(Val, Exception)
 {
@@ -293,6 +295,7 @@ void test_check_throw(Val v, boost::math::rounding_error const*)
       BOOST_CHECK((v == boost::math::tools::max_value<Val>()) || (v == -boost::math::tools::max_value<Val>()));
    }
 }
+#endif
 
 } // namespace tools
 } // namespace math
@@ -303,7 +306,9 @@ void test_check_throw(Val v, boost::math::rounding_error const*)
   // exception-free testing support, ideally we'd only define this in our tests,
   // but to keep things simple we really need it somewhere that's always included:
   //
-#ifdef BOOST_NO_EXCEPTIONS
+#if defined(BOOST_MATH_NO_EXCEPTIONS) && defined(BOOST_MATH_HAS_GPU_SUPPORT)
+#  define BOOST_MATH_CHECK_THROW(x, y)
+#elif defined(BOOST_MATH_NO_EXCEPTIONS) 
 #  define BOOST_MATH_CHECK_THROW(x, ExceptionType) boost::math::tools::test_check_throw(x, static_cast<ExceptionType const*>(nullptr));
 #else
 #  define BOOST_MATH_CHECK_THROW(x, y) BOOST_CHECK_THROW(x, y)
diff --git a/reporting/accuracy/Jamfile.v2 b/reporting/accuracy/Jamfile.v2
index a07e60083b..0d961e66c5 100644
--- a/reporting/accuracy/Jamfile.v2
+++ b/reporting/accuracy/Jamfile.v2
@@ -12,7 +12,8 @@ import testing ;
 import modules ;
 import path ;
 import pch ;
-import ../../../config/checks/config : requires ;
+import-search /boost/config/checks ;
+import config : requires ;
 using quickbook ;
 using auto-index ;
 
@@ -47,16 +48,16 @@ explicit has_gsl ;
 exe has_rmath : has_rmath.cpp Rmath ;
 explicit has_rmath ;
 
-CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c 
-btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c 
-cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c 
-exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c 
-fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c 
-incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c 
-log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c 
-polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c 
-shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c 
-tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c 
+CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c
+btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c
+cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c
+exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c
+fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c
+incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c
+log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c
+polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c
+shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c
+tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c
 sqrt.c floor.c setprec.c mtherr.c ;
 
 path-constant here : . ;
@@ -68,10 +69,10 @@ actions check_exists
 explicit $(here)/third_party/cephes_double/acosh.c ;
 
 lib cephes_double : $(here)/third_party/cephes_double/$(CEPHES_SOURCE)
-    :         
+    :
         release
         <link>static
-        [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : <build>no ] 
+        [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : <build>no ]
    ;
 
 explicit cephes_double ;
@@ -80,52 +81,52 @@ rule all-tests {
      local result ;
      for local source in [ glob test*.cpp ]
      {
-         result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework 
-         : : : 
-          [ check-target-builds has_gsl : <define>ALWAYS_TEST_DOUBLE : ] 
+         result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index quadmath mpfr
+         : : :
+          [ check-target-builds has_gsl : <define>ALWAYS_TEST_DOUBLE : ]
           <target-os>linux:<linkflags>-lpthread
           <target-os>linux:<linkflags>-lrt
           <toolset>gcc:<linkflags>$(OTHERFLAGS) ]
          ;
-         result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework 
-         : : : [ check-target-builds has_cxx17_cmath : <define>TEST_CXX17_CMATH : <build>no ] 
+         result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework  /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index quadmath mpfr
+         : : : [ check-target-builds has_cxx17_cmath : <define>TEST_CXX17_CMATH : <build>no ]
           <target-os>linux:<linkflags>-lpthread
           <target-os>linux:<linkflags>-lrt
           <toolset>gcc:<linkflags>$(OTHERFLAGS)
-          : $(source:B)_cxx17_cmath ] 
+          : $(source:B)_cxx17_cmath ]
          ;
-         result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework 
-         : : : [ check-target-builds has_c99_cmath : <define>TEST_C99 : <build>no ] 
+         result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework  /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index quadmath mpfr
+         : : : [ check-target-builds has_c99_cmath : <define>TEST_C99 : <build>no ]
           <target-os>linux:<linkflags>-lpthread
           <target-os>linux:<linkflags>-lrt
           <toolset>gcc:<linkflags>$(OTHERFLAGS)
-          : $(source:B)_c99 ] 
+          : $(source:B)_c99 ]
          ;
-         result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework gsl gslcblas
-         : : : [ check-target-builds has_gsl : <define>TEST_GSL : <build>no ] 
+         result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index gsl gslcblas
+         : : : [ check-target-builds has_gsl : <define>TEST_GSL : <build>no ]
           <target-os>linux:<linkflags>-lpthread
           <target-os>linux:<linkflags>-lrt
           <toolset>gcc:<linkflags>$(OTHERFLAGS)
-          : $(source:B)_gsl ] 
+          : $(source:B)_gsl ]
          ;
-         result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework Rmath
-         : : : [ check-target-builds has_rmath : <define>TEST_RMATH : <build>no ] 
+         result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index Rmath
+         : : : [ check-target-builds has_rmath : <define>TEST_RMATH : <build>no ]
           <target-os>linux:<linkflags>-lpthread
           <target-os>linux:<linkflags>-lrt
           <toolset>gcc:<linkflags>$(OTHERFLAGS)
-          : $(source:B)_rmath ] 
+          : $(source:B)_rmath ]
          ;
-         result += [ run $(source) /boost/system /boost/filesystem /boost/test//boost_unit_test_framework cephes_double
-         : : : [ check-target-builds $(here)/third_party/cephes_double/acosh.c : <define>TEST_CEPHES <source>cephes_double : <build>no ] 
+         result += [ run $(source) /boost/system//boost_system /boost/filesystem//boost_filesystem /boost/test//boost_unit_test_framework /boost/interprocess//boost_interprocess /boost/multiprecision//boost_multiprecision /boost/type_index//boost_type_index cephes_double
+         : : : [ check-target-builds $(here)/third_party/cephes_double/acosh.c : <define>TEST_CEPHES <source>cephes_double : <build>no ]
           <target-os>linux:<linkflags>-lpthread
           <target-os>linux:<linkflags>-lrt
           <toolset>gcc:<linkflags>$(OTHERFLAGS)
-          : $(source:B)_cephes ] 
+          : $(source:B)_cephes ]
          ;
      }
-     return $(result) ;     
+     return $(result) ;
 }
-            
+
 test-suite report_gen : [ all-tests ] ;
 
 path-constant images_location : html ;
@@ -138,7 +139,7 @@ boostbook standalone
     :
         # Path for links to Boost:
         <xsl:param>boost.root=../../../../..
-        
+
         # Some general style settings:
         <xsl:param>table.footnote.number.format=1
         <xsl:param>footnote.number.format=1
@@ -159,25 +160,25 @@ boostbook standalone
         <xsl:param>generate.section.toc.level=10
     ;
 
-lib gmp ;
-lib mpfr ;
-lib quadmath ;
+searched-lib gmp : : <link>shared ;
+searched-lib mpfr : : <link>shared ;
+searched-lib quadmath : : <link>shared ;
 #
 # Some manual tests that are expensive to run:
 #
-run erf_error_plot.cpp mpfr gmp : : : release <cxxstd>17 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_double ;
+run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release <cxxstd>17 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_double ;
 explicit erf_error_plot_double ;
-run erf_error_plot.cpp mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_long_double ;
+run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_long_double ;
 explicit erf_error_plot_long_double ;
-run erf_error_plot.cpp mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_cpp_bin_float_50 ;
+run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_cpp_bin_float_50 ;
 explicit erf_error_plot_cpp_bin_float_50 ;
-run erf_error_plot.cpp mpfr gmp quadmath : : : release <cxxstd>17 <cxxstd-dialect>gnu <define>TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_float128 ;
+run erf_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp quadmath : : : release <cxxstd>17 <cxxstd-dialect>gnu <define>TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erf_error_plot_float128 ;
 explicit erf_error_plot_cpp_bin_float_50 ;
-run erfc_error_plot.cpp mpfr gmp : : : release <cxxstd>17 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_double ;
+run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release <cxxstd>17 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_double ;
 explicit erfc_error_plot_double ;
-run erfc_error_plot.cpp mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_long_double ;
+run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE="\"long double\"" [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_long_double ;
 explicit erfc_error_plot_long_double ;
-run erfc_error_plot.cpp mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_cpp_bin_float_50 ;
+run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp : : : release <cxxstd>17 <define>TEST_TYPE=cpp_bin_float_50 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_cpp_bin_float_50 ;
 explicit erfc_error_plot_cpp_bin_float_50 ;
-run erfc_error_plot.cpp mpfr gmp quadmath : : : release <cxxstd>17 <cxxstd-dialect>gnu <define>TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_float128 ;
+run erfc_error_plot.cpp /boost/multiprecision//boost_multiprecision mpfr gmp quadmath : : : release <cxxstd>17 <cxxstd-dialect>gnu <define>TEST_TYPE=float128 [ check-target-builds ../../config//has_mpfr : : <build>no ] : erfc_error_plot_float128 ;
 explicit erfc_error_plot_cpp_bin_float_50 ;
diff --git a/reporting/performance/Jamfile.v2 b/reporting/performance/Jamfile.v2
index 1f1d6fd639..0a960e9236 100644
--- a/reporting/performance/Jamfile.v2
+++ b/reporting/performance/Jamfile.v2
@@ -12,11 +12,13 @@ import testing ;
 import modules ;
 import path ;
 import pch ;
-import ../../../config/checks/config : requires ;
+import-search /boost/config/checks ;
+import config : requires ;
 using quickbook ;
 using auto-index ;
 
-import ../../../predef/tools/check/predef
+import-search /boost/predef/tools/check ;
+import predef
     : check require
     : predef-check predef-require ;
 
@@ -39,9 +41,9 @@ if $(is_unix)
 #
 # Configuration first:
 #
-lib gsl ;
-lib gslcblas ;
-lib Rmath ;
+searched-lib gsl ;
+searched-lib gslcblas ;
+searched-lib Rmath ;
 obj has_libstdcxx_tr1 : ../accuracy/has_libstdcxx_tr1.cpp ;
 explicit has_libstdcxx_tr1 ;
 obj has_c99_cmath : has_c99_cmath.cpp ;
@@ -53,16 +55,16 @@ explicit has_rmath ;
 obj is_intel_win : is_intel_win.cpp ;
 explicit is_intel_win ;
 
-CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c 
-btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c 
-cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c 
-exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c 
-fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c 
-incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c 
-log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c 
-polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c 
-shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c 
-tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c 
+CEPHES_SOURCE = acosh.c airy.c asin.c asinh.c atan.c atanh.c bdtr.c beta.c
+btdtr.c cbrt.c chbevl.c chdtr.c clog.c cmplx.c const.c
+cosh.c dawsn.c drand.c ei.c ellie.c ellik.c ellpe.c ellpj.c ellpk.c
+exp.c exp10.c exp2.c expn.c expx2.c fabs.c fac.c fdtr.c
+fresnl.c gamma.c gdtr.c hyp2f1.c hyperg.c i0.c i1.c igami.c incbet.c
+incbi.c igam.c isnan.c iv.c j0.c j1.c jn.c jv.c k0.c k1.c kn.c kolmogorov.c
+log.c log2.c log10.c lrand.c nbdtr.c ndtr.c ndtri.c pdtr.c planck.c
+polevl.c polmisc.c polylog.c polyn.c pow.c powi.c psi.c rgamma.c round.c
+shichi.c sici.c sin.c sindg.c sinh.c spence.c stdtr.c struve.c
+tan.c tandg.c tanh.c unity.c yn.c zeta.c zetac.c
 sqrt.c floor.c setprec.c mtherr.c ;
 
 DCDFLIB_SOURCE = dcdflib.c ipmpar.c ;
@@ -78,33 +80,33 @@ explicit $(here)/third_party/cephes_double/acosh.c ;
 explicit $(here)/third_party/dcdflib/dcdflib.c ;
 
 lib cephes_double : $(here)/third_party/cephes_double/$(CEPHES_SOURCE)
-    :         
+    :
         release
         <link>static
-        [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : <build>no ] 
+        [ check-target-builds $(here)/third_party/cephes_double/acosh.c : : <build>no ]
    ;
 
 explicit cephes_double ;
 
 lib dcdflib : $(here)/third_party/dcdflib/$(DCDFLIB_SOURCE)
-    :         
+    :
         release
         <link>static
-        [ check-target-builds $(here)/third_party/dcdflib/dcdflib.c : : <build>no ] 
+        [ check-target-builds $(here)/third_party/dcdflib/dcdflib.c : : <build>no ]
    ;
 
 explicit dcdflib ;
 
-obj table_helper : table_helper.cpp ;
+obj table_helper : table_helper.cpp /boost/filesystem//boost_filesystem /boost/interprocess//boost_interprocess ;
 
 rule all-tests {
      local result ;
      for local source in [ glob test*.cpp ]
      {
-        result += [ run $(source) /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
-        : : : release <include>../../test 
-        [ check-target-builds ../accuracy//has_c99_cmath : <define>TEST_C99 ] 
-        [ check-target-builds has_libstdcxx_tr1 : <define>TEST_LIBSTDCXX ] 
+        result += [ run $(source) /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem /boost/multiprecision//boost_multiprecision /boost/filesystem//boost_filesystem table_helper
+        : : : <variant>release <include>../../test
+        [ check-target-builds ../accuracy//has_c99_cmath : <define>TEST_C99 ]
+        [ check-target-builds has_libstdcxx_tr1 : <define>TEST_LIBSTDCXX ]
         [ check-target-builds ../accuracy//has_gsl : <define>TEST_GSL <source>gsl <source>gslcblas ]
         [ check-target-builds ../accuracy//has_rmath : <define>TEST_RMATH <source>Rmath ]
        # [ check-target-builds is_intel_win : <build>no : ]
@@ -113,46 +115,46 @@ rule all-tests {
         #<toolset>msvc:<address-model>64
         ] ;
      }
-     return $(result) ;     
+     return $(result) ;
 }
-            
+
 #
 # Special cases to test different compiler options,
 # cbrt first as an example of a trivial function:
 #
-run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
      : : : debug <define>COMPILER_COMPARISON_TABLES [ predef-require "BOOST_COMP_MSVC" ] <address-model>32 : test_cbrt_msvc_debug ;
-run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
      : : : release <define>COMPILER_COMPARISON_TABLES [ predef-require "BOOST_COMP_MSVC" ] <cxxflags>-Ox <address-model>32 : test_cbrt_msvc_release_32 ;
-run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
      : : : release <define>COMPILER_COMPARISON_TABLES [ predef-require "BOOST_COMP_MSVC" ] <cxxflags>-Ox <address-model>64 : test_cbrt_msvc_release_64 ;
-run test_cbrt.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_cbrt.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
      : : : release <define>COMPILER_COMPARISON_TABLES [ check-target-builds is_intel_win : : <build>no ] <toolset>intel:<cxxflags>-Ox <address-model>64 : test_cbrt_intel_release ;
 #
 # Now jn as a little more complex:
 #
-run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : debug <define>COMPILER_COMPARISON_TABLES <include>../../test [ predef-require "BOOST_COMP_MSVC" ] <address-model>32 : test_jn_msvc_debug ;
-run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : release <define>COMPILER_COMPARISON_TABLES <include>../../test [ predef-require "BOOST_COMP_MSVC" ] <cxxflags>-Ox <address-model>32 : test_jn_msvc_release_32 ;
-run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : release <define>COMPILER_COMPARISON_TABLES <include>../../test [ predef-require "BOOST_COMP_MSVC" ] <cxxflags>-Ox <address-model>64 : test_jn_msvc_release_64 ;
-run test_jn.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_jn.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : release <define>COMPILER_COMPARISON_TABLES <include>../../test [ check-target-builds is_intel_win : : <build>no ] <address-model>64 : test_jn_intel_release ;
 #
 # Then something really expensive, like the inverse-incomplete-beta:
 #
-run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : debug <define>COMPILER_COMPARISON_TABLES <include>../../test [ predef-require "BOOST_COMP_MSVC" ] <address-model>32 : test_ibeta_inv_msvc_debug ;
-run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : release <define>COMPILER_COMPARISON_TABLES <include>../../test [ predef-require "BOOST_COMP_MSVC" ] <cxxflags>-Ox <address-model>32 : test_ibeta_inv_msvc_release_32 ;
-run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : release <define>COMPILER_COMPARISON_TABLES <include>../../test [ predef-require "BOOST_COMP_MSVC" ] <cxxflags>-Ox <address-model>64 : test_ibeta_inv_msvc_release_64 ;
-run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system /boost/chrono /boost/filesystem table_helper
+run test_ibeta_inv.cpp /boost/regex//boost_regex /boost/system//boost_system /boost/chrono//boost_chrono /boost/filesystem//boost_filesystem table_helper
    : : : release <define>COMPILER_COMPARISON_TABLES <include>../../test [ check-target-builds is_intel_win : : <build>no ] <toolset>intel:<cxxflags>-Ox <address-model>64 : test_ibeta_inv_intel_release ;
 
-test-suite report_gen : [ all-tests ] test_cbrt_msvc_debug test_cbrt_msvc_release_32 test_cbrt_msvc_release_64 test_cbrt_intel_release 
-   test_jn_msvc_debug test_jn_msvc_release_32 test_jn_msvc_release_64 test_jn_intel_release test_ibeta_inv_msvc_debug 
+test-suite report_gen : [ all-tests ] test_cbrt_msvc_debug test_cbrt_msvc_release_32 test_cbrt_msvc_release_64 test_cbrt_intel_release
+   test_jn_msvc_debug test_jn_msvc_release_32 test_jn_msvc_release_64 test_jn_intel_release test_ibeta_inv_msvc_debug
    test_ibeta_inv_msvc_release_32 test_ibeta_inv_msvc_release_64 test_ibeta_inv_intel_release ;
 
 path-constant images_location : html ;
@@ -165,7 +167,7 @@ boostbook standalone
     :
         # Path for links to Boost:
         <xsl:param>boost.root=../../../../..
-        
+
         # Some general style settings:
         <xsl:param>table.footnote.number.format=1
         <xsl:param>footnote.number.format=1
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index e63471c891..95d7849f6b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -2,4 +2,54 @@
 # Distributed under the Boost Software License, Version 1.0.
 # https://www.boost.org/LICENSE_1_0.txt
 
-add_subdirectory(compile_test)
+include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST)
+
+if(HAVE_BOOST_TEST)
+
+    boost_test(SOURCES check_cmake_version.cpp ARGUMENTS ${PROJECT_VERSION} LINK_LIBRARIES Boost::core Boost::config)    
+
+    if (BOOST_MATH_ENABLE_CUDA)
+
+        message(STATUS "Building boost.math with CUDA")
+
+        find_package(CUDA REQUIRED)
+        enable_language(CUDA)
+        set(CMAKE_CUDA_EXTENSIONS OFF)
+
+        enable_testing()
+
+        boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception Boost::unit_test_framework ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS}  )
+
+    elseif (BOOST_MATH_ENABLE_NVRTC)
+
+        message(STATUS "Building boost.math with NVRTC")
+
+        find_package(CUDA REQUIRED)
+
+        enable_testing()
+
+        set(CUDA_nvrtc_LIBRARY /usr/local/cuda/lib64/libnvrtc.so)
+        
+        if (BOOST_MATH_NVRTC_CI_RUN)
+            boost_test_jamfile(FILE nvrtc_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception ${CUDA_nvrtc_LIBRARY} ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} COMPILE_DEFINITIONS BOOST_MATH_NVRTC_CI_RUN=1 INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} )
+        else ()
+            boost_test_jamfile(FILE nvrtc_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception ${CUDA_nvrtc_LIBRARY} ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} )
+        endif()
+
+    elseif (BOOST_MATH_ENABLE_SYCL)
+
+        message(STATUS "Building boost.math with SYCL")
+
+        set(CMAKE_CXX_COMPILER "icpx")
+        set(CMAKE_C_COMPILER "icx")
+
+        enable_testing()
+
+        boost_test_jamfile(FILE sycl_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::static_assert Boost::throw_exception Boost::unit_test_framework sycl COMPILE_OPTIONS -fsycl )
+    else()
+
+        add_subdirectory(compile_test)
+
+    endif()
+
+endif()
diff --git a/test/Jamfile.v2 b/test/Jamfile.v2
index c7eaa3b1eb..4adb29d160 100644
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@@ -12,7 +12,8 @@ import testing ;
 import modules ;
 import path ;
 import pch ;
-import ../../config/checks/config : requires ;
+import-search /boost/config/checks ;
+import config : requires ;
 
 local ntl-path = [ modules.peek : NTL_PATH ] ;
 local gmp_path = [ modules.peek : GMP_PATH ] ;
@@ -29,11 +30,16 @@ if $(remove-test-targets)
    OBJ_REMOVAL_OPTIONS = <pch>off ;
 }
 
-obj no_eh : noeh_support.cpp ;
+obj no_eh : noeh_support.cpp : <library>/boost/config//boost_config ;
 
 
 project
     : requirements
+      <library>/boost/math//boost_math
+      <library>/boost/multiprecision//boost_multiprecision
+      <library>/boost/test//included
+      <library>/boost/type_index//boost_type_index
+      <library>/boost/ublas//boost_ublas
       $(OBJ_REMOVAL_OPTIONS)
       <toolset>acc:<cxxflags>+W2068,2461,2236,4070,4069
       <toolset>intel-win:<cxxflags>-nologo
@@ -59,7 +65,6 @@ project
       <toolset>borland:<runtime-link>static
       # <toolset>msvc:<cxxflags>/wd4506 has no effect?
       # suppress xstring(237) : warning C4506: no definition for inline function
-      <include>../../..
       <exception-handling>off:<source>no_eh
       <link>shared:<define>BOOST_REGEX_DYN_LINK=1
       # For simplicities sake, make everything a static lib:
@@ -84,178 +89,193 @@ if $(ntl-path)
 }
 else
 {
-   lib ntl ;
+   searched-lib ntl ;
 }
 
 explicit ntl ;
 
-cpp-pch pch : pch.hpp : <use>../../test/build//boost_unit_test_framework ;
-cpp-pch pch_light : pch_light.hpp : <use>../../test/build//boost_unit_test_framework ;
+cpp-pch pch : pch.hpp : <use>/boost/test//boost_unit_test_framework ;
+cpp-pch pch_light : pch_light.hpp : <use>/boost/test//boost_unit_test_framework ;
 lib compile_test_main : compile_test/main.cpp ;
 
+searched-lib quadmath ;
+
+local float128_type_intel_quad =
+    [ check-target-builds ../config//has_intel_quad "Intel _Quad datatype support"
+        : <cxxflags>-Qoption,cpp,--extended_float_type <define>BOOST_MATH_USE_FLOAT128 ] ;
+local float128_type_gcc =
+    [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support"
+        : <toolset>gcc:<library>quadmath <toolset>gcc:<define>BOOST_MATH_TEST_FLOAT128 ] ;
+local float128_type_floatmax =
+    [ check-target-builds ../config//has_128bit_floatmax_t "128-bit floatmax_t" : : <build>no ] ;
+local float128_type =
+    $(float128_type_intel_quad) $(float128_type_gcc) $(float128_type_floatmax) ;
+
 test-suite special_fun :
-   [ run test_1F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=1 : test_1F0_1 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests.
-   [ run test_1F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 : test_1F0_2 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests.
-   [ run test_1F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 : test_1F0_3 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests.
-   [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] <define>TEST=1 : test_2F0_1 ]
-   [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] <define>TEST=2 : test_2F0_2 ]
-   [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] <define>TEST=3 : test_2F0_3 ]
-   [ run test_2F0.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] <define>TEST=4 : test_2F0_4 ]
+   [ run test_1F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=1 : test_1F0_1 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests.
+   [ run test_1F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 : test_1F0_2 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests.
+   [ run test_1F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 : test_1F0_3 ] # hypergeometric_pFq_checked_series.hpp uses auto, the rest are from quadrature tests.
+   [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) <define>TEST=1 : test_2F0_1 ]
+   [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) <define>TEST=2 : test_2F0_2 ]
+   [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) <define>TEST=3 : test_2F0_3 ]
+   [ run test_2F0.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] $(float128_type) <define>TEST=4 : test_2F0_4 ]
 
-   [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=1 : test_0F1_1 ]
-   [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 : test_0F1_2 ]
+   [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=1 : test_0F1_1 ]
+   [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 : test_0F1_2 ]
 
-   [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=1 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_integrals ]
-   [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_float ]
-   [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_double ]
-   [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_long_double ]
+   [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=1 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_integrals ]
+   [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_float ]
+   [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_double ]
+   [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_long_double ]
 
-   [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_float ]
-   [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_double ]
-   [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_long_double ]
-   [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_real_concept ]
+   [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_float ]
+   [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_double ]
+   [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_long_double ]
+   [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_real_concept ]
    #  These are slow...
-   [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_float ]
-   [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_double ]
-   [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_long_double ]
-   [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_real_concept ]
+   [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_float ]
+   [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_double ]
+   [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_long_double ]
+   [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_log_real_concept ]
    # pFq:
-   [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_float ]
-   [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_double ]
-   [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_long_double ]
-   [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_real_concept ]
+   [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=2 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_float ]
+   [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_double ]
+   [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_long_double ]
+   [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 release <toolset>clang:<cxxflags>-Wno-literal-range : test_pFq_real_concept ]
 
 
-   [ run hypot_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ]
-   [ run pow_test.cpp ../../test/build//boost_unit_test_framework ]
-   [ run logaddexp_test.cpp ../../test/build//boost_unit_test_framework ]
-   [ run logsumexp_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_variadic_templates ] ]
-   [ run ccmath_sqrt_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isinf_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isnan_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_abs_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isfinite_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isnormal_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_fpclassify_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_frexp_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_ldexp_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_div_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_logb_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_ilogb_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_scalbn_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_scalbln_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_floor_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_ceil_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_trunc_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_modf_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_round_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_fmod_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_remainder_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_copysign_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_hypot_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_fdim_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_fmax_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_fmin_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isgreater_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isgreaterequal_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isless_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_islessequal_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_isunordered_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_next_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_fma_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run ccmath_signbit_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run log1p_expm1_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run powm1_sqrtp1m1_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run git_issue_705.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run git_issue_810.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run git_issue_826.cpp ../../test/build//boost_unit_test_framework  ]
+   [ run hypot_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework ]
+   [ run pow_test.cpp /boost/test//boost_unit_test_framework ]
+   [ run logaddexp_test.cpp /boost/test//boost_unit_test_framework ]
+   [ run logsumexp_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_variadic_templates ] ]
+   [ run ccmath_sqrt_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isinf_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isnan_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_abs_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isfinite_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isnormal_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_fpclassify_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_frexp_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_ldexp_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_div_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_logb_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_ilogb_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_scalbn_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_scalbln_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_floor_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_ceil_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_trunc_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_modf_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_round_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_fmod_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_remainder_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_copysign_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_hypot_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_fdim_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_fmax_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_fmin_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isgreater_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isgreaterequal_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isless_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_islessequal_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_isunordered_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_next_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_fma_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run ccmath_signbit_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] $(float128_type) ]
+   [ run log1p_expm1_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run powm1_sqrtp1m1_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run git_issue_705.cpp /boost/test//boost_unit_test_framework  ]
+   [ run git_issue_810.cpp /boost/test//boost_unit_test_framework  ]
+   [ run git_issue_826.cpp /boost/test//boost_unit_test_framework  ]
    [ run git_issue_961.cpp ]
    [ run git_issue_1006.cpp ]
    [ run git_issue_184.cpp ]
    [ run git_issue_1137.cpp ]
    [ run git_issue_1139.cpp ]
-   [ run special_functions_test.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_airy.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_j.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_y.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_i.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_k.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_j_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_y_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_i_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_k_prime.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run bessel_iterator_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_beta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_bessel_airy_zeros.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_bernoulli_constants.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_binomial_coeff.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run git_issue_1175.cpp ]
+   [ run git_issue_1194.cpp ]
+   [ run special_functions_test.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_airy.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_j.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_y.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_i.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_k.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_j_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_y_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_i_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_k_prime.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run bessel_iterator_test.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_beta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_bessel_airy_zeros.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_bernoulli_constants.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_binomial_coeff.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
         <define>TEST1
         : test_carlson_1  ]
-   [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
         <define>TEST2
       : test_carlson_2  ]
-   [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
         <define>TEST3
         : test_carlson_3  ]
-   [ run test_carlson.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_carlson.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
         <define>TEST4
         : test_carlson_4  ]
-   [ run test_cbrt.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_difference.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_digamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_ellint_1.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_ellint_2.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_ellint_3.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_ellint_d.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_jacobi_theta.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ]
-   [ run test_jacobi_zeta.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_heuman_lambda.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_erf.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
+   [ run test_cbrt.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_difference.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_digamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_ellint_1.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_ellint_2.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_ellint_3.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_ellint_d.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_jacobi_theta.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ]
+   [ run test_jacobi_zeta.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_heuman_lambda.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_erf.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
    [ run erf_limits_test.cpp ]
-   [ run test_expint.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_factorials.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_gamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
+   [ run test_expint.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_factorials.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_gamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
    [ run test_gamma_edge.cpp ]
-   [ run test_gamma_mp.cpp ../../test/build//boost_unit_test_framework : : : release <define>TEST=1 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :  test_gamma_mp_1 ]
-   [ run test_gamma_mp.cpp ../../test/build//boost_unit_test_framework : : : release <define>TEST=2 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_gamma_mp_2 ]
-   [ run test_gamma_mp.cpp ../../test/build//boost_unit_test_framework : : : release <define>TEST=3 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_gamma_mp_3 ]
-   [ run test_hankel.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_hermite.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_ibeta.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_gamma_mp.cpp /boost/test//boost_unit_test_framework : : : release <define>TEST=1 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :  test_gamma_mp_1 ]
+   [ run test_gamma_mp.cpp /boost/test//boost_unit_test_framework : : : release <define>TEST=2 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_gamma_mp_2 ]
+   [ run test_gamma_mp.cpp /boost/test//boost_unit_test_framework : : : release <define>TEST=3 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_gamma_mp_3 ]
+   [ run test_hankel.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_hermite.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_ibeta.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_ibeta_float  ]
-   [ run test_ibeta.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_ibeta_double  ]
-   [ run test_ibeta.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_ibeta_long_double  ]
-   [ run test_ibeta.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -263,7 +283,7 @@ test-suite special_fun :
           <define>TEST_DATA=1
           <toolset>intel:<pch>off
         : test_ibeta_real_concept1  ]
-   [ run test_ibeta.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -271,7 +291,7 @@ test-suite special_fun :
           <define>TEST_DATA=2
           <toolset>intel:<pch>off
         : test_ibeta_real_concept2  ]
-   [ run test_ibeta.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -279,7 +299,7 @@ test-suite special_fun :
           <define>TEST_DATA=3
           <toolset>intel:<pch>off
         : test_ibeta_real_concept3  ]
-   [ run test_ibeta.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -288,7 +308,7 @@ test-suite special_fun :
           <toolset>intel:<pch>off
         : test_ibeta_real_concept4  ]
 
-   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -296,7 +316,7 @@ test-suite special_fun :
           <toolset>intel:<pch>off
           <toolset>gcc:<cxxflags>-Wno-overflow
         : test_ibeta_derivative_float  ]
-   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -304,7 +324,7 @@ test-suite special_fun :
           <toolset>intel:<pch>off
           <toolset>gcc:<cxxflags>-Wno-overflow
         : test_ibeta_derivative_double  ]
-   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -312,7 +332,7 @@ test-suite special_fun :
           <toolset>intel:<pch>off
           <toolset>gcc:<cxxflags>-Wno-overflow
         : test_ibeta_derivative_long_double  ]
-   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -321,7 +341,7 @@ test-suite special_fun :
           <toolset>intel:<pch>off
           <toolset>gcc:<cxxflags>-Wno-overflow
         : test_ibeta_derivative_real_concept1  ]
-   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -330,7 +350,7 @@ test-suite special_fun :
           <toolset>intel:<pch>off
           <toolset>gcc:<cxxflags>-Wno-overflow
         : test_ibeta_derivative_real_concept2  ]
-   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -339,7 +359,7 @@ test-suite special_fun :
           <toolset>intel:<pch>off
           <toolset>gcc:<cxxflags>-Wno-overflow
         : test_ibeta_derivative_real_concept3  ]
-   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_derivative.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -349,28 +369,28 @@ test-suite special_fun :
           <toolset>gcc:<cxxflags>-Wno-overflow
         : test_ibeta_derivative_real_concept4  ]
 
-   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_ibeta_inv_float  ]
-   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_ibeta_inv_double  ]
-   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_ibeta_inv_long_double  ]
-   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -378,7 +398,7 @@ test-suite special_fun :
           <define>TEST_DATA=1
           <toolset>intel:<pch>off
         : test_ibeta_inv_real_concept1  ]
-   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -386,7 +406,7 @@ test-suite special_fun :
           <define>TEST_DATA=2
           <toolset>intel:<pch>off
         : test_ibeta_inv_real_concept2  ]
-   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -394,7 +414,7 @@ test-suite special_fun :
           <define>TEST_DATA=3
           <toolset>intel:<pch>off
         : test_ibeta_inv_real_concept3  ]
-   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -402,28 +422,28 @@ test-suite special_fun :
           <define>TEST_DATA=4
           <toolset>intel:<pch>off
         : test_ibeta_inv_real_concept4  ]
-   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_ibeta_inv_ab_float  ]
-   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_ibeta_inv_ab_double  ]
-   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_ibeta_inv_ab_long_double  ]
-   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -431,7 +451,7 @@ test-suite special_fun :
           <define>TEST_DATA=1
           <toolset>intel:<pch>off
         : test_ibeta_inv_ab_real_concept1  ]
-   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv_ab.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -439,7 +459,7 @@ test-suite special_fun :
           <define>TEST_DATA=2
           <toolset>intel:<pch>off
         : test_ibeta_inv_ab_real_concept2  ]
-   [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_ibeta_inv_ab.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -447,57 +467,57 @@ test-suite special_fun :
           <define>TEST_DATA=3
           <toolset>intel:<pch>off
         : test_ibeta_inv_ab_real_concept3  ]
-   [ run test_igamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_igamma_inv.cpp test_instances//test_instances pch_light  ../../test/build//boost_unit_test_framework
+   [ run test_igamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_igamma_inv.cpp test_instances//test_instances pch_light  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_igamma_inv_float  ]
-   [ run test_igamma_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_igamma_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_igamma_inv_double  ]
-   [ run test_igamma_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_igamma_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_igamma_inv_long_double  ]
-   [ run test_igamma_inv.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_igamma_inv.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_REAL_CONCEPT
           <toolset>intel:<pch>off
         : test_igamma_inv_real_concept  ]
-   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_igamma_inva_float  ]
-   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_igamma_inva_double  ]
-   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
       : test_igamma_inva_long_double  ]
-   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework
+   [ run test_igamma_inva.cpp  test_instances//test_instances pch_light /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -507,23 +527,23 @@ test-suite special_fun :
    [ run test_instantiate1.cpp test_instantiate2.cpp  ]
    [ run test_instantiate1.cpp test_instantiate2.cpp  : : : <exception-handling>off : test_instantiate_no_eh ]
    [ run test_instantiate3.cpp ]
-   [ run test_jacobi.cpp pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_laguerre.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
+   [ run test_jacobi.cpp pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_laguerre.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
 
-   [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=1  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_1 ]
-   [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=2  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_2 ]
-   [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=3  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_3 ]
-   [ run test_lambert_w.cpp ../../test/build//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=4 <define>BOOST_MATH_TEST_FLOAT128 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_4 ]
-   [ run test_lambert_w_integrals_float128.cpp ../../test/build//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" : <build>no ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
-   [ run test_lambert_w_integrals_quad.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
-   [ run test_lambert_w_integrals_long_double.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ]
-   [ run test_lambert_w_integrals_double.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ]
-   [ run test_lambert_w_integrals_float.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ]
-   [ run test_lambert_w_derivative.cpp ../../test/build//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ]  ]
+   [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=1  $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_1 ]
+   [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=2  $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_2 ]
+   [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=3  $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_3 ]
+   [ run test_lambert_w.cpp /boost/test//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION=4 <define>BOOST_MATH_TEST_FLOAT128 $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_lambert_w_multiprecision_4 ]
+   [ run test_lambert_w_integrals_float128.cpp /boost/test//boost_unit_test_framework : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" : <build>no ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
+   [ run test_lambert_w_integrals_quad.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
+   [ run test_lambert_w_integrals_long_double.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ]
+   [ run test_lambert_w_integrals_double.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ]
+   [ run test_lambert_w_integrals_float.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] ]
+   [ run test_lambert_w_derivative.cpp /boost/test//boost_unit_test_framework : : : <define>BOOST_MATH_TEST_MULTIPRECISION  $(float128_type)  ]
 
-   [ run test_legendre.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ]  ]
-   [ run chebyshev_test.cpp  : : : [ requires cxx11_inline_namespaces cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for cxx11_constexpr ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ]  ]
+   [ run test_legendre.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework : : : $(float128_type)  ]
+   [ run chebyshev_test.cpp  : : : [ requires cxx11_inline_namespaces cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for cxx11_constexpr ] $(float128_type)  ]
    [ run chebyshev_transform_test.cpp ../config//fftw3f : : : <define>TEST1 [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : <build>no ] : chebyshev_transform_test_1 ]
    [ run chebyshev_transform_test.cpp ../config//fftw3 : : : <define>TEST2 [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : <build>no ] : chebyshev_transform_test_2 ]
    [ run chebyshev_transform_test.cpp ../config//fftw3l : : : <define>TEST3 [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : <build>no ] : chebyshev_transform_test_3 ]
@@ -535,52 +555,56 @@ test-suite special_fun :
    [ run cardinal_trigonometric_test.cpp ../config//fftw3q ../config//quadmath : : : <define>TEST4 [ requires cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_fftw3 "libfftw3" : : <build>no ] [ check-target-builds ../config//has_float128 "__float128" : : <build>no ] : cardinal_trigonometric_test_4 ]
 
 
-   [ run test_ldouble_simple.cpp ../../test/build//boost_unit_test_framework  ]
+   [ run test_ldouble_simple.cpp /boost/test//boost_unit_test_framework  ]
    # Needs to run in release mode, as it's rather slow:
-   [ run test_next.cpp pch ../../test/build//boost_unit_test_framework : : : release  ]
-   [ run test_next_decimal.cpp pch ../../test/build//boost_unit_test_framework : : : release  ]
-   [ run test_owens_t.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_polygamma.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_trigamma.cpp test_instances//test_instances ../../test/build//boost_unit_test_framework  ]
-   [ run test_round.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run git_issue_430.cpp pch ../../test/build//boost_unit_test_framework ]
-   [ run test_spherical_harmonic.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_sign.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_tgamma_for_issue396_part1.cpp ../../test/build//boost_unit_test_framework : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
-   [ run test_tgamma_for_issue396_part2.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ]  ]
-   [ run test_tgamma_ratio.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_trig.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework  ]
-   [ run test_zeta.cpp ../../test/build//boost_unit_test_framework test_instances//test_instances pch_light  ]
-   [ run test_sinc.cpp ../../test/build//boost_unit_test_framework pch_light ]
-   [ run test_fibonacci.cpp ../../test/build//boost_unit_test_framework ]
+   [ run test_next.cpp pch /boost/test//boost_unit_test_framework : : : release  ]
+   [ run test_next_decimal.cpp pch /boost/test//boost_unit_test_framework : : : release  ]
+   [ run test_owens_t.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_polygamma.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_trigamma.cpp test_instances//test_instances /boost/test//boost_unit_test_framework  ]
+   [ run test_round.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run git_issue_430.cpp pch /boost/test//boost_unit_test_framework ]
+   [ run test_spherical_harmonic.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_sign.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_tgamma_for_issue396_part1.cpp /boost/test//boost_unit_test_framework : : : $(float128_type) <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
+   [ run test_tgamma_for_issue396_part2.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ]  ]
+   [ run test_tgamma_ratio.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_trig.cpp test_instances//test_instances pch_light /boost/test//boost_unit_test_framework  ]
+   [ run test_zeta.cpp /boost/test//boost_unit_test_framework test_instances//test_instances pch_light  ]
+   [ run test_sinc.cpp /boost/test//boost_unit_test_framework pch_light ]
+   [ run test_fibonacci.cpp /boost/test//boost_unit_test_framework ]
 ;
 
 test-suite distribution_tests :
-   [ run test_arcsine.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_bernoulli.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_beta_dist.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_arcsine.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_landau.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
+   [ run test_saspoint5.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
+   [ run test_holtsmark.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
+   [ run test_mapairy.cpp pch : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
+   [ run test_bernoulli.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_beta_dist.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_binomial_float  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_binomial_double  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_binomial_long_double  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -588,7 +612,7 @@ test-suite distribution_tests :
           <define>TEST_ROUNDING=0
           <toolset>intel:<pch>off
         : test_binomial_real_concept0  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -596,7 +620,7 @@ test-suite distribution_tests :
           <define>TEST_ROUNDING=1
           <toolset>intel:<pch>off
         : test_binomial_real_concept1  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -604,7 +628,7 @@ test-suite distribution_tests :
           <define>TEST_ROUNDING=2
           <toolset>intel:<pch>off
         : test_binomial_real_concept2  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -612,7 +636,7 @@ test-suite distribution_tests :
           <define>TEST_ROUNDING=3
           <toolset>intel:<pch>off
         : test_binomial_real_concept3  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -620,7 +644,7 @@ test-suite distribution_tests :
           <define>TEST_ROUNDING=4
           <toolset>intel:<pch>off
         : test_binomial_real_concept4  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -628,7 +652,7 @@ test-suite distribution_tests :
           <define>TEST_ROUNDING=5
           <toolset>intel:<pch>off
         : test_binomial_real_concept5  ]
-   [ run test_binomial.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_binomial.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -636,145 +660,145 @@ test-suite distribution_tests :
           <define>TEST_ROUNDING=6
           <toolset>intel:<pch>off
         : test_binomial_real_concept6  ]
-   [ run test_cauchy.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_chi_squared.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_dist_overloads.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_exponential_dist.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_extreme_value.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_find_location.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_find_scale.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_fisher_f.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_gamma_dist.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_geometric.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_hyperexponential_dist.cpp ../../test/build//boost_unit_test_framework ]
-   [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework
+   [ run test_cauchy.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_chi_squared.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_dist_overloads.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_exponential_dist.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_extreme_value.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_find_location.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_find_scale.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_fisher_f.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_gamma_dist.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_geometric.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_hyperexponential_dist.cpp /boost/test//boost_unit_test_framework ]
+   [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_QUANT=0
           <toolset>intel:<pch>off
         : test_hypergeometric_dist0  ]
-   [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework
+   [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_QUANT=1
           <toolset>intel:<pch>off
         : test_hypergeometric_dist1  ]
-   [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework
+   [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_QUANT=2
           <toolset>intel:<pch>off
         : test_hypergeometric_dist2  ]
-   [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework
+   [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_QUANT=3
           <toolset>intel:<pch>off
         : test_hypergeometric_dist3  ]
-   [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework
+   [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_QUANT=4
           <toolset>intel:<pch>off
         : test_hypergeometric_dist4  ]
-   [ run test_hypergeometric_dist.cpp ../../test/build//boost_unit_test_framework
+   [ run test_hypergeometric_dist.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_QUANT=5
           <toolset>intel:<pch>off
         : test_hypergeometric_dist5  ]
-   [ run test_inverse_chi_squared_distribution.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_inverse_gamma_distribution.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_inverse_gaussian.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_kolmogorov_smirnov.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] ]
-   [ run test_laplace.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_inv_hyp.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_logistic_dist.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_lognormal.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework
+   [ run test_inverse_chi_squared_distribution.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_inverse_gamma_distribution.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_inverse_gaussian.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_kolmogorov_smirnov.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] ]
+   [ run test_laplace.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_inv_hyp.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_logistic_dist.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_lognormal.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_negative_binomial_float  ]
-   [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework
+   [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_negative_binomial_double  ]
-   [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework
+   [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_negative_binomial_long_double  ]
-   [ run test_negative_binomial.cpp ../../test/build//boost_unit_test_framework
+   [ run test_negative_binomial.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_REAL_CONCEPT
           <toolset>intel:<pch>off
         : test_negative_binomial_real_concept  ]
-   [ run test_nc_chi_squared.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_chi_squared.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_nc_chi_squared_float  ]
-   [ run test_nc_chi_squared.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_chi_squared.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_nc_chi_squared_double  ]
-   [ run test_nc_chi_squared.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_chi_squared.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_nc_chi_squared_long_double  ]
-   [ run test_nc_chi_squared.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_chi_squared.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_REAL_CONCEPT
           <toolset>intel:<pch>off
         : test_nc_chi_squared_real_concept  ]
-   [ run test_nc_beta.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_nc_beta.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_nc_beta_float  ]
-   [ run test_nc_beta.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_nc_beta.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_nc_beta_double  ]
-   [ run test_nc_beta.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_nc_beta.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_nc_beta_long_double  ]
-   [ run test_nc_beta.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_nc_beta.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -782,7 +806,7 @@ test-suite distribution_tests :
           <define>TEST_DATA=1
           <toolset>intel:<pch>off
         : test_nc_beta_real_concept1  ]
-   [ run test_nc_beta.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_nc_beta.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
@@ -790,109 +814,109 @@ test-suite distribution_tests :
           <define>TEST_DATA=2
           <toolset>intel:<pch>off
         : test_nc_beta_real_concept2  ]
-   [ run test_nc_f.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_nc_t.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_f.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_nc_t.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_nc_t_float  ]
-   [ run test_nc_t.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_t.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_nc_t_double  ]
-   [ run test_nc_t.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_t.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_nc_t_long_double  ]
-   [ run test_nc_t.cpp  pch ../../test/build//boost_unit_test_framework
+   [ run test_nc_t.cpp  pch /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_REAL_CONCEPT
           <toolset>intel:<pch>off
         : test_nc_t_real_concept  ]
-   [ run test_normal.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_pareto.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_poisson.cpp ../../test/build//boost_unit_test_framework
+   [ run test_normal.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_pareto.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_poisson.cpp /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_FLOAT
           <toolset>intel:<pch>off
         : test_poisson_float  ]
-   [ run test_poisson.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_poisson.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_DOUBLE
           <toolset>intel:<pch>off
         : test_poisson_double  ]
-   [ run test_poisson.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_poisson.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_LDOUBLE
           <toolset>intel:<pch>off
         : test_poisson_long_double  ]
-   [ run test_poisson.cpp  ../../test/build//boost_unit_test_framework
+   [ run test_poisson.cpp  /boost/test//boost_unit_test_framework
         : # command line
         : # input files
         : # requirements
           <define>TEST_REAL_CONCEPT
           <toolset>intel:<pch>off
         : test_poisson_real_concept  ]
-   [ run test_rayleigh.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_students_t.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_skew_normal.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_triangular.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_uniform.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_weibull.cpp ../../test/build//boost_unit_test_framework  ]
+   [ run test_rayleigh.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_students_t.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_skew_normal.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_triangular.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_uniform.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_weibull.cpp /boost/test//boost_unit_test_framework ]
 
-   [ run test_legacy_nonfinite.cpp ../../test/build//boost_unit_test_framework ]
-   [ run test_basic_nonfinite.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_lexical_cast.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_nonfinite_trap.cpp ../../test/build//boost_unit_test_framework : : : <exception-handling>off:<build>no  ]
-   [ run test_signed_zero.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run complex_test.cpp ../../test/build//boost_unit_test_framework  ]
+   [ run test_legacy_nonfinite.cpp /boost/test//boost_unit_test_framework ]
+   [ run test_basic_nonfinite.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_lexical_cast.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_nonfinite_trap.cpp /boost/test//boost_unit_test_framework : : : <exception-handling>off:<build>no  ]
+   [ run test_signed_zero.cpp /boost/test//boost_unit_test_framework  ]
+   [ run complex_test.cpp /boost/test//boost_unit_test_framework  ]
 
    [ compile test_dist_deduction_guides.cpp : [ requires cpp_deduction_guides cpp_variadic_templates ] ]
-   [ run git_issue_800.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run git_issue_845.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run scipy_issue_14901.cpp ../../test/build//boost_unit_test_framework ]
-   [ run scipy_issue_14901_ncf.cpp ../../test/build//boost_unit_test_framework ]
-   [ run scipy_issue_15101.cpp ../../test/build//boost_unit_test_framework ]
-   [ run scipy_issue_17146.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run scipy_issue_17388.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run scipy_issue_17916.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run scipy_issue_17916_nct.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run scipy_issue_18302.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run scipy_issue_18511.cpp ../../test/build//boost_unit_test_framework ]
+   [ run git_issue_800.cpp /boost/test//boost_unit_test_framework  ]
+   [ run git_issue_845.cpp /boost/test//boost_unit_test_framework  ]
+   [ run scipy_issue_14901.cpp /boost/test//boost_unit_test_framework ]
+   [ run scipy_issue_14901_ncf.cpp /boost/test//boost_unit_test_framework ]
+   [ run scipy_issue_15101.cpp /boost/test//boost_unit_test_framework ]
+   [ run scipy_issue_17146.cpp /boost/test//boost_unit_test_framework  ]
+   [ run scipy_issue_17388.cpp /boost/test//boost_unit_test_framework  ]
+   [ run scipy_issue_17916.cpp /boost/test//boost_unit_test_framework  ]
+   [ run scipy_issue_17916_nct.cpp /boost/test//boost_unit_test_framework  ]
+   [ run scipy_issue_18302.cpp /boost/test//boost_unit_test_framework  ]
+   [ run scipy_issue_18511.cpp /boost/test//boost_unit_test_framework ]
    [ compile scipy_issue_19762.cpp ]
    [ run git_issue_1120.cpp ]
 ;
 
-test-suite new_floats : 
+test-suite new_floats :
    [ compile compile_test/float32.cpp ]
    [ compile compile_test/float64.cpp ]
    [ compile compile_test/float128.cpp ]
-   [ run test_float_io.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
-   [ run test_float_io.cpp : : : <define>BOOST_MATH_TEST_IO_AS_INTEL_QUAD=1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] : test_float_io_quad ]
+   [ run test_float_io.cpp : : : $(float128_type) ]
+   [ run test_float_io.cpp : : : <define>BOOST_MATH_TEST_IO_AS_INTEL_QUAD=1 $(float128_type) : test_float_io_quad ]
 ;
 
 test-suite mp :
 
-   [ run test_nc_t_quad.cpp  pch ../../test/build//boost_unit_test_framework : : : release <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run test_polynomial.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST1 : test_polynomial_1  ]
-   [ run test_polynomial.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST2 : test_polynomial_2  ]
-   [ run test_polynomial.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST3 : test_polynomial_3  ]
+   [ run test_nc_t_quad.cpp  pch /boost/test//boost_unit_test_framework : : : release <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
+   [ run test_polynomial.cpp /boost/test//boost_unit_test_framework : : : <define>TEST1 : test_polynomial_1  ]
+   [ run test_polynomial.cpp /boost/test//boost_unit_test_framework : : : <define>TEST2 : test_polynomial_2  ]
+   [ run test_polynomial.cpp /boost/test//boost_unit_test_framework : : : <define>TEST3 : test_polynomial_3  ]
    [ run test_estrin.cpp ]
    [ run polynomial_concept_check.cpp ]
 
@@ -908,13 +932,13 @@ test-suite misc :
    ../build//boost_math_tr1f
    ../build//boost_math_c99
    ../build//boost_math_c99f
-   ../../test/build//boost_unit_test_framework
+   /boost/test//boost_unit_test_framework
      ]
 
    [ run test_tr1.cpp
       ../build//boost_math_tr1l
       ../build//boost_math_c99l
-      ../../test/build//boost_unit_test_framework
+      /boost/test//boost_unit_test_framework
       : : :
       <define>TEST_LD=1
       [ check-target-builds ../config//has_long_double_support "long double support" : : <build>no ]
@@ -927,7 +951,7 @@ test-suite misc :
       ../build//boost_math_tr1f
       ../build//boost_math_c99
       ../build//boost_math_c99f
-      ../../test/build//boost_unit_test_framework
+      /boost/test//boost_unit_test_framework
       : : : #requirements
       :
       test_tr1_c
@@ -936,23 +960,23 @@ test-suite misc :
    [ run test_tr1.c
       ../build//boost_math_tr1l
       ../build//boost_math_c99l
-      ../../test/build//boost_unit_test_framework
+      /boost/test//boost_unit_test_framework
       : : :
       <define>TEST_LD=1
       [ check-target-builds ../config//has_long_double_support "long double support" : : <build>no ]
       :
       test_tr1_c_long_double
     ]
-   [ run test_constants.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run simple_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run centered_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run luroth_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run engel_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run test_classify.cpp pch ../../test/build//boost_unit_test_framework : : : <toolset>msvc:<cxxflags>/bigobj ]
-   [ run test_error_handling.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run legendre_stieltjes_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_range_based_for ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]  ]
-   [ run test_minima.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_rationals.cpp ../../test/build//boost_unit_test_framework
+   [ run test_constants.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ]
+   [ run simple_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ]
+   [ run centered_continued_fraction_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ]
+   [ run luroth_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ]
+   [ run engel_expansion_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) ]
+   [ run test_classify.cpp pch /boost/test//boost_unit_test_framework : : : <toolset>msvc:<cxxflags>/bigobj ]
+   [ run test_error_handling.cpp /boost/test//boost_unit_test_framework  ]
+   [ run legendre_stieltjes_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_range_based_for ] $(float128_type)  ]
+   [ run test_minima.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_rationals.cpp /boost/test//boost_unit_test_framework
       test_rational_instances/test_rational_double1.cpp
       test_rational_instances/test_rational_double2.cpp
       test_rational_instances/test_rational_double3.cpp
@@ -973,96 +997,96 @@ test-suite misc :
       test_rational_instances/test_rational_real_concept4.cpp
       test_rational_instances/test_rational_real_concept5.cpp
    ]
-   [ run test_policy.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_2.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_3.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_4.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_5.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_6.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_7.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_8.cpp ../../test/build//boost_unit_test_framework  ]
+   [ run test_policy.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_2.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_3.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_4.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_5.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_6.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_7.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_8.cpp /boost/test//boost_unit_test_framework  ]
    [ compile test_policy_9.cpp  ]
-   [ run test_policy_10.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_policy_sf.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_long_double_support.cpp ../../test/build//boost_unit_test_framework
+   [ run test_policy_10.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_policy_sf.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_long_double_support.cpp /boost/test//boost_unit_test_framework
       : : : [ check-target-builds ../config//has_long_double_support "long double support" : : <build>no ] ]
    [ run test_recurrence.cpp : : : <define>TEST=1 [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ] <toolset>msvc:<cxxflags>/bigobj : test_recurrence_1 ]
-   [ run test_recurrence.cpp : : : <define>TEST=2 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ]  : test_recurrence_2 ]
-   [ run test_recurrence.cpp : : : <define>TEST=3 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ]  : test_recurrence_3 ]
+   [ run test_recurrence.cpp : : : <define>TEST=2 release $(float128_type) [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ]  : test_recurrence_2 ]
+   [ run test_recurrence.cpp : : : <define>TEST=3 release $(float128_type) [ requires cxx11_unified_initialization_syntax cxx11_hdr_tuple cxx11_auto_declarations cxx11_decltype ]  : test_recurrence_3 ]
 
    [ run test_print_info_on_type.cpp  ]
-   [ run univariate_statistics_test.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx17_if_constexpr cxx17_std_apply ] ]
-   [ run univariate_statistics_backwards_compatible_test.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ]
-   [ run ooura_fourier_integral_test.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ requires cxx17_if_constexpr cxx17_std_apply ] ]
+   [ run univariate_statistics_test.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx17_if_constexpr cxx17_std_apply ] ]
+   [ run univariate_statistics_backwards_compatible_test.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] ]
+   [ run ooura_fourier_integral_test.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] $(float128_type) [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run empirical_cumulative_distribution_test.cpp  : : :  [ requires cxx17_if_constexpr cxx17_std_apply ] ]
-   [ run norms_test.cpp ../../test/build//boost_unit_test_framework : : :  [ requires cxx17_if_constexpr cxx17_std_apply ] ]
+   [ run norms_test.cpp /boost/test//boost_unit_test_framework : : :  [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run signal_statistics_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run anderson_darling_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run ljung_box_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run cubic_roots_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
    [ run quartic_roots_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
-   [ run test_t_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
-   [ run test_z_test.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
+   [ run test_t_test.cpp : : : $(float128_type) [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
+   [ run test_z_test.cpp : : : $(float128_type) [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
    [ run bivariate_statistics_test.cpp : : : [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
    [ run linear_regression_test.cpp : : : [ requires cxx11_hdr_forward_list cxx11_hdr_atomic cxx11_hdr_thread cxx11_hdr_tuple cxx11_hdr_future cxx11_sfinae_expr ]  ]
    [ run test_runs_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
-   [ run test_chatterjee_correlation.cpp ../../test/build//boost_unit_test_framework ]
-   [ run test_rank.cpp ../../test/build//boost_unit_test_framework ]
-   [ run lanczos_smoothing_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
-   [ run condition_number_test.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST=1 <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] : condition_number_test_1 ]
-   [ run condition_number_test.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST=2 <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] : condition_number_test_2 ]
-   [ run condition_number_test.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST=3 <toolset>msvc:<cxxflags>/bigobj [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] : condition_number_test_3 ]
-   [ run test_real_concept.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_remez.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_roots.cpp pch ../../test/build//boost_unit_test_framework  ]
-   [ run test_root_iterations.cpp pch ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_tuple ]  ]
-   [ run test_root_finding_concepts.cpp ../../test/build//boost_unit_test_framework  ]
-   [ run test_toms748_solve.cpp pch ../../test/build//boost_unit_test_framework  ]
+   [ run test_chatterjee_correlation.cpp /boost/test//boost_unit_test_framework ]
+   [ run test_rank.cpp /boost/test//boost_unit_test_framework ]
+   [ run lanczos_smoothing_test.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ]
+   [ run condition_number_test.cpp /boost/test//boost_unit_test_framework : : : <define>TEST=1 <toolset>msvc:<cxxflags>/bigobj $(float128_type) : condition_number_test_1 ]
+   [ run condition_number_test.cpp /boost/test//boost_unit_test_framework : : : <define>TEST=2 <toolset>msvc:<cxxflags>/bigobj $(float128_type) : condition_number_test_2 ]
+   [ run condition_number_test.cpp /boost/test//boost_unit_test_framework : : : <define>TEST=3 <toolset>msvc:<cxxflags>/bigobj $(float128_type) : condition_number_test_3 ]
+   [ run test_real_concept.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_remez.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_roots.cpp pch /boost/test//boost_unit_test_framework  ]
+   [ run test_root_iterations.cpp pch /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_tuple ]  ]
+   [ run test_root_finding_concepts.cpp /boost/test//boost_unit_test_framework  ]
+   [ run test_toms748_solve.cpp pch /boost/test//boost_unit_test_framework  ]
    [ run  compile_test/interpolators_cubic_spline_incl_test.cpp compile_test_main : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run  compile_test/interpolators_barycentric_rational_incl_test.cpp compile_test_main : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run octonion_test.cpp
-       ../../test/build//boost_unit_test_framework ]
+       /boost/test//boost_unit_test_framework ]
    [ run octonion_test_simple.cpp ]
    [ run quaternion_constexpr_test.cpp ]
    [ run quaternion_test.cpp
-       ../../test/build//boost_unit_test_framework : : : <toolset>msvc-14.0:<debug-symbols>off [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
+       /boost/test//boost_unit_test_framework : : : <toolset>msvc-14.0:<debug-symbols>off $(float128_type) ]
    [ run quaternion_mult_incl_test.cpp
        quaternion_mi1.cpp
        quaternion_mi2.cpp
-       ../../test/build//boost_unit_test_framework ]
+       /boost/test//boost_unit_test_framework ]
 
 #   [ run __temporary_test.cpp test_instances//test_instances : : : <test-info>always_show_run_output <pch>off ]
 ;
 
 test-suite interpolators :
-   [ run test_barycentric_rational.cpp ../../test/build//boost_unit_test_framework : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]  ]
-   [ run test_vector_barycentric_rational.cpp ../../test/build//boost_unit_test_framework : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ]  [ check-target-builds ../../multiprecision/config//has_eigen : : <build>no ] ]
-   [ run cardinal_cubic_b_spline_test.cpp ../../test/build//boost_unit_test_framework : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ]
-   [ run cardinal_b_spline_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run jacobi_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run gegenbauer_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run daubechies_scaling_test.cpp  : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
-   [ run daubechies_wavelet_test.cpp  : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
-   [ run fourier_transform_daubechies_test.cpp  : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
-   [ run wavelet_transform_test.cpp  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
-   [ run agm_test.cpp  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run rsqrt_test.cpp  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run cohen_acceleration_test.cpp  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
+   [ run test_barycentric_rational.cpp /boost/test//boost_unit_test_framework : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ] $(float128_type)  ]
+   [ run test_vector_barycentric_rational.cpp /boost/test//boost_unit_test_framework : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions cxx11_auto_declarations cxx11_unified_initialization_syntax ]  [ check-target-builds ../../multiprecision/config//has_eigen : : <build>no ] ]
+   [ run cardinal_cubic_b_spline_test.cpp /boost/test//boost_unit_test_framework : : :  [ requires cxx11_smart_ptr cxx11_defaulted_functions ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release ]
+   [ run cardinal_b_spline_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ]
+   [ run jacobi_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ]
+   [ run gegenbauer_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ]
+   [ run daubechies_scaling_test.cpp /boost/hana//boost_hana : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
+   [ run daubechies_wavelet_test.cpp /boost/hana//boost_hana : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
+   [ run fourier_transform_daubechies_test.cpp  : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] ]
+   [ run wavelet_transform_test.cpp /boost/hana//boost_hana : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
+   [ run agm_test.cpp  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
+   [ run rsqrt_test.cpp  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
+   [ run cohen_acceleration_test.cpp  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
    [ compile compile_test/filters_daubechies_incl_test.cpp : [ requires cxx17_if_constexpr cxx17_std_apply ]  ]
    [ compile compile_test/sf_daubechies_scaling_incl_test.cpp : [ requires cxx17_if_constexpr cxx17_std_apply ]  ]
    [ run whittaker_shannon_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] ]
    [ run cardinal_quadratic_b_spline_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] ]
-   [ run cardinal_quintic_b_spline_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run makima_test.cpp  : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run pchip_test.cpp  : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run septic_hermite_test.cpp  : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run quintic_hermite_test.cpp  : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run cubic_hermite_test.cpp  : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
+   [ run cardinal_quintic_b_spline_test.cpp : : :  [ requires cxx11_auto_declarations cxx11_constexpr cxx11_smart_ptr cxx11_defaulted_functions ] $(float128_type) ]
+   [ run makima_test.cpp /boost/circular_buffer//boost_circular_buffer : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
+   [ run pchip_test.cpp /boost/circular_buffer//boost_circular_buffer : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
+   [ run septic_hermite_test.cpp  : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
+   [ run quintic_hermite_test.cpp /boost/circular_buffer//boost_circular_buffer : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
+   [ run cubic_hermite_test.cpp /boost/circular_buffer//boost_circular_buffer : : :  [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
    [ run bilinear_uniform_test.cpp  : : : [ requires cxx17_if_constexpr cxx17_std_apply ]  ]
-   [ run bezier_polynomial_test.cpp  : : : [ requires cxx17_if_constexpr cxx17_std_apply ]  [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] ]
-   [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST=1 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_1 ]
-   [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST=2 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_2 ]
-   [ run catmull_rom_test.cpp ../../test/build//boost_unit_test_framework : : : <define>TEST=3 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_3 ]
+   [ run bezier_polynomial_test.cpp  : : : [ requires cxx17_if_constexpr cxx17_std_apply ]  $(float128_type) ]
+   [ run catmull_rom_test.cpp /boost/test//boost_unit_test_framework : : : <define>TEST=1 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_1 ]
+   [ run catmull_rom_test.cpp /boost/test//boost_unit_test_framework : : : <define>TEST=2 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_2 ]
+   [ run catmull_rom_test.cpp /boost/test//boost_unit_test_framework : : : <define>TEST=3 [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] : catmull_rom_test_3 ]
    [ run compile_test/interpolators_catmull_rom_incl_test.cpp compile_test_main  : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ]
    [ run compile_test/interpolators_catmull_rom_concept_test.cpp compile_test_main   : : : [ requires cxx11_hdr_array cxx11_hdr_initializer_list ] ]
    [ run test_standalone_asserts.cpp ]
@@ -1076,118 +1100,118 @@ test-suite interpolators :
 ;
 
 test-suite quadrature :
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST1 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] :
    tanh_sinh_quadrature_test_1 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST1A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST1A $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] :
    tanh_sinh_quadrature_test_1a ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST1B [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST1B $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] :
    tanh_sinh_quadrature_test_1b ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST2 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] :
    tanh_sinh_quadrature_test_2 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST2A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST2A $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] :
    tanh_sinh_quadrature_test_2a ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST3 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_3 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST3A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST3A $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_3a ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST4 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST4 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_4 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST5 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST5 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_5 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST6 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : <toolset>msvc:<cxxflags>/bigobj <define>TEST6 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_6 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST6A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST6A $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_6a ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST7 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST7 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_7 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST8 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST8 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_8 ]
-   [ run  tanh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
+   [ run  tanh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
      : : : release <toolset>msvc:<cxxflags>/bigobj <define>TEST9
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax sfinae_expr ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] :
    tanh_sinh_quadrature_test_9 ]
 
    [ run tanh_sinh_mpfr.cpp ../tools//mpfr ../tools//gmp : : : [ check-target-builds ../config//has_mpfr : : <build>no ] [ check-target-builds ../config//has_gmp : : <build>no ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
-   [ run sinh_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : <define>TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] : exp_sinh_quadrature_test_1 ]
+   [ run sinh_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : <define>TEST1 $(float128_type) [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] : exp_sinh_quadrature_test_1 ]
 
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST2 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] : exp_sinh_quadrature_test_2 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : <define>TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : <define>TEST3 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_3 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST4 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST4 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_4 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST5 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST5 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_5 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST6 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST6 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_6 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST7 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST7 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_7 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST8 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST8 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_8 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST9 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST9 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_9 ]
-   [ run exp_sinh_quadrature_test.cpp ../../test/build//boost_unit_test_framework
-     : : : release <define>TEST10 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run exp_sinh_quadrature_test.cpp /boost/test//boost_unit_test_framework
+     : : : release <define>TEST10 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : exp_sinh_quadrature_test_10 ]
 
-   [ run gauss_quadrature_test.cpp : : : <define>TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run gauss_quadrature_test.cpp : : : <define>TEST1 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release : gauss_quadrature_test_1 ]
-   [ run gauss_quadrature_test.cpp : : : <define>TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run gauss_quadrature_test.cpp : : : <define>TEST2 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : gauss_quadrature_test_2 ]
-   [ run gauss_quadrature_test.cpp : : : <define>TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run gauss_quadrature_test.cpp : : : <define>TEST3 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : gauss_quadrature_test_3 ]
-   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST1 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release : gauss_kronrod_quadrature_test_1 ]
-   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST1A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST1A $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : gauss_kronrod_quadrature_test_1a ]
-   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST2 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : gauss_kronrod_quadrature_test_2 ]
-   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run gauss_kronrod_quadrature_test.cpp : : : <define>TEST3 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : gauss_kronrod_quadrature_test_3 ]
-   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST1 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST1 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release : adaptive_gauss_quadrature_test_1 ]
-   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST1A [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST1A $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : adaptive_gauss_quadrature_test_1a ]
-   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST2 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST2 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : adaptive_gauss_quadrature_test_2 ]
-   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST3 [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
+   [ run adaptive_gauss_kronrod_quadrature_test.cpp : : : <define>TEST3 $(float128_type)
      [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : adaptive_gauss_quadrature_test_3 ]
 
    [ run naive_monte_carlo_test.cpp : : :
@@ -1290,28 +1314,28 @@ test-suite quadrature :
    [ compile compile_test/gauss_concept_test.cpp : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ compile compile_test/gauss_kronrod_concept_test.cpp : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_smart_ptr cxx11_unified_initialization_syntax ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run git_issue_898.cpp ]
-   [ run git_issue_1075.cpp : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
+   [ run git_issue_1075.cpp : : : $(float128_type) ]
 
-   [ run test_trapezoidal.cpp ../../test/build//boost_unit_test_framework : : :
+   [ run test_trapezoidal.cpp /boost/test//boost_unit_test_framework : : :
       release [ requires cxx11_lambdas cxx11_auto_declarations cxx11_decltype cxx11_unified_initialization_syntax cxx11_variadic_templates ]
-      [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>"-Bstatic -lquadmath -Bdynamic" ] ]
+      $(float128_type) ]
 ;
 
 test-suite autodiff :
-   [ run test_numerical_differentiation.cpp ../../test/build//boost_unit_test_framework  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx11_auto_declarations cxx11_constexpr ] ]
+   [ run test_numerical_differentiation.cpp /boost/test//boost_unit_test_framework  : : : <toolset>msvc:<cxxflags>/bigobj [ requires cxx11_auto_declarations cxx11_constexpr ] ]
    [ run  compile_test/diff_numerical_differentiation_incl_test.cpp compile_test_main  : : : [ requires cxx11_auto_declarations cxx11_constexpr ] ]
    [ compile  compile_test/diff_numerical_differentiation_concept_test.cpp  : [ requires cxx11_auto_declarations cxx11_constexpr ] ]
-   [ run test_autodiff_1.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ run test_autodiff_2.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ run test_autodiff_3.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ run test_autodiff_4.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ run test_autodiff_5.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ run test_autodiff_6.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ run test_autodiff_7.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ run test_autodiff_8.cpp ../../test/build//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ check-target-builds ../config//is_ci_standalone_run "Standalone CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ compile compile_test/diff_autodiff_incl_test.cpp : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ compile compile_test/diff_finite_difference_incl_test.cpp : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
-   [ compile compile_test/diff_lanczos_smoothing_incl_test.cpp : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ requires cxx17_if_constexpr cxx17_std_apply ] [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_1.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_2.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_3.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_4.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_5.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_6.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_7.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ run test_autodiff_8.cpp /boost/test//boost_unit_test_framework : : : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ check-target-builds ../config//is_ci_standalone_run "Standalone CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ compile compile_test/diff_autodiff_incl_test.cpp : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ compile compile_test/diff_finite_difference_incl_test.cpp : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
+   [ compile compile_test/diff_lanczos_smoothing_incl_test.cpp : <toolset>gcc-mingw:<cxxflags>-Wa,-mbig-obj <debug-symbols>off <toolset>msvc:<cxxflags>/bigobj release [ requires cxx17_if_constexpr cxx17_std_apply ] $(float128_type) [ check-target-builds ../config//is_cygwin_run "Cygwin CI run" : <build>no ] [ requires cxx11_inline_namespaces ] ]
 ;
 
 #
@@ -1320,18 +1344,18 @@ test-suite autodiff :
 #  too much time:
 #
 test-suite long-running-tests :
-   [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] : test_0F1_3 ]
-   [ run test_0F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release : test_0F1_4 ]
-   [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_real_concept ]
-   [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_quad ]
-   [ run test_1F1.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_dec_40 ]
-   [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_quad ]
-   [ run test_1F1_regularized.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_regularized_dec_40 ]
-   [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_log_quad ]
-   [ run test_1F1_log.cpp ../../test/build//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_log_dec_40 ]
-   [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <define>BOOST_MATH_TEST_FLOAT128 <linkflags>"-Bstatic -lquadmath -Bdynamic" ] <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_pFq_quad ]
-   [ run test_pFq.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_pFq_dec_40 ]
-   [ run test_pFq_precision.cpp ../tools//mpfr ../tools//gmp ../../test/build//boost_unit_test_framework /boost/system//boost_system /boost/chrono//boost_chrono : : : [ check-target-builds ../config//has_mpfr : : <build>no ] [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] release <toolset>clang:<cxxflags>-Wno-literal-range ]
+   [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=3 release $(float128_type) : test_0F1_3 ]
+   [ run test_0F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=4 release : test_0F1_4 ]
+   [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=5 <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_real_concept ]
+   [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_quad ]
+   [ run test_1F1.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_dec_40 ]
+   [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] <toolset>clang:<cxxflags>-Wno-literal-range : test_1F1_regularized_quad ]
+   [ run test_1F1_regularized.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_regularized_dec_40 ]
+   [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release $(float128_type) <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_log_quad ]
+   [ run test_1F1_log.cpp /boost/test//boost_unit_test_framework : : : release [ requires cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_1F1_log_dec_40 ]
+   [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=6 release $(float128_type) <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_pFq_quad ]
+   [ run test_pFq.cpp /boost/test//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] <define>TEST=7 release <toolset>clang:<cxxflags>-Wno-literal-range [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : test_pFq_dec_40 ]
+   [ run test_pFq_precision.cpp ../tools//mpfr ../tools//gmp /boost/test//boost_unit_test_framework /boost/system//boost_system /boost/chrono//boost_chrono : : : [ check-target-builds ../config//has_mpfr : : <build>no ] [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] release <toolset>clang:<cxxflags>-Wno-literal-range ]
    [ run test_constant_generate.cpp : : : release <define>USE_CPP_FLOAT=1 <exception-handling>off:<build>no  ]
 ;
 
@@ -1349,9 +1373,7 @@ rule get_float128_tests
            : # command line
            : # input files
            : # requirements
-            [ check-target-builds ../config//has_intel_quad "Intel _Quad datatype support" : <cxxflags>-Qoption,cpp,--extended_float_type <define>BOOST_MATH_USE_FLOAT128 ]
-            [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ]
-            [ check-target-builds ../config//has_128bit_floatmax_t "128-bit floatmax_t" : : <build>no ]
+            $(float128_type)
             <define>BOOST_ALL_NO_LIB
            : $(source:B)_floatmax_t ] ;
      }
@@ -1380,6 +1402,10 @@ test-suite concepts :
    [ run  compile_test/dist_inv_chi_sq_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
    [ run  compile_test/dist_hyperexponential_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
    [ run  compile_test/dist_hypergeo_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
+   [ run  compile_test/dist_landau_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
+   [ run  compile_test/dist_mapairy_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
+   [ run  compile_test/dist_holtsmark_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
+   [ run  compile_test/dist_saspoint5_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
    [ run  compile_test/dist_laplace_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
    [ run  compile_test/dist_logistic_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
    [ run  compile_test/dist_lognormal_incl_test.cpp compile_test_main : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ]  ]
@@ -1500,7 +1526,7 @@ test-suite concepts :
    [ compile compile_test/std_real_concept_check.cpp  : <define>EMULATE128 [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] : std_real_concept_check_128 ]
    [ run  compile_test/cstdfloat_concept_check_1.cpp
       : : : [ check-target-builds ../config//has_intel_quad "Intel _Quad datatype support" : <cxxflags>-Qoption,cpp,--extended_float_type ]
-            [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
+            $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run  compile_test/cstdfloat_concept_check_2.cpp  : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run  compile_test/cstdfloat_concept_check_3.cpp  : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run  compile_test/cstdfloat_concept_check_4.cpp  : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
@@ -1509,7 +1535,7 @@ test-suite concepts :
    [ compile  compile_test/cstdfloat_iostream_incl_test.cpp : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ compile  compile_test/cstdfloat_limits_incl_test.cpp : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ compile  compile_test/cstdfloat_types_incl_test.cpp : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
-   [ run  test_cstdfloat.cpp ../../test/build//boost_unit_test_framework  : : : [ check-target-builds ../config//has_float128 "GCC libquadmath and __float128 support" : <linkflags>-lquadmath ] [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
+   [ run  test_cstdfloat.cpp /boost/test//boost_unit_test_framework  : : : $(float128_type) [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run  compile_test/sf_airy_incl_test.cpp compile_test_main  : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run  compile_test/sf_hankel_incl_test.cpp compile_test_main  : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
    [ run  compile_test/sf_jacobi_incl_test.cpp compile_test_main  : : : [ check-target-builds ../config//is_ci_sanitizer_run "Sanitizer CI run" : <build>no ] ]
diff --git a/test/beta_med_data.ipp b/test/beta_med_data.ipp
index b1f35d98e1..eb3e884b8c 100644
--- a/test/beta_med_data.ipp
+++ b/test/beta_med_data.ipp
@@ -3,6 +3,10 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifdef __CUDACC__
+#pragma nv_diag_suppress 221
+#endif
+
    static const std::array<std::array<typename table_type<T>::type, 3>, 1830> beta_med_data = { {
       {{ SC_(0.4883005917072296142578125), SC_(0.4883005917072296142578125), SC_(3.245912809500479157065104747353807392371) }}, 
       {{ SC_(3.5808107852935791015625), SC_(0.4883005917072296142578125), SC_(1.007653173802923954909901438393379243537) }}, 
diff --git a/test/ccmath_abs_test.cpp b/test/ccmath_abs_test.cpp
index 467b7a5a15..6f09b3e0b3 100644
--- a/test/ccmath_abs_test.cpp
+++ b/test/ccmath_abs_test.cpp
@@ -76,9 +76,7 @@ int main()
 
     // Types that are convertible to int
     test<short>();
-#if CHAR_MIN != 0
-    test<char>();
-#endif
+    test<signed char>();
 
     // fabs
     fabs_test<float>();
diff --git a/test/ccmath_isinf_test.cpp b/test/ccmath_isinf_test.cpp
index 1f6b61d2fa..3ee5d1375a 100644
--- a/test/ccmath_isinf_test.cpp
+++ b/test/ccmath_isinf_test.cpp
@@ -5,6 +5,7 @@
 
 #include <cmath>
 #include <cfloat>
+#include <cstdint>
 #include <limits>
 #include <boost/math/ccmath/isinf.hpp>
 #include <boost/core/lightweight_test.hpp>
diff --git a/test/ccmath_sqrt_test.cpp b/test/ccmath_sqrt_test.cpp
index af2911bb85..eb2cf039dc 100644
--- a/test/ccmath_sqrt_test.cpp
+++ b/test/ccmath_sqrt_test.cpp
@@ -4,6 +4,7 @@
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
 #include <cmath>
+#include <cstdint>
 #include <limits>
 #include <type_traits>
 #include <boost/math/ccmath/sqrt.hpp>
diff --git a/test/check_cmake_version.cpp b/test/check_cmake_version.cpp
new file mode 100644
index 0000000000..2fd4648368
--- /dev/null
+++ b/test/check_cmake_version.cpp
@@ -0,0 +1,27 @@
+// Check whether the version in CMakeLists.txt is up to date
+//
+// Copyright 2018 Peter Dimov
+//
+// Distributed under the Boost Software License, Version 1.0.
+//
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/core/lightweight_test.hpp>
+#include <boost/version.hpp>
+#include <cstdio>
+
+int main( int ac, char const* av[] )
+{
+    BOOST_TEST_EQ( ac, 2 );
+
+    if( ac >= 2 )
+    {
+        char version[ 64 ];
+        std::sprintf( version, "%d.%d.%d", BOOST_VERSION / 100000, BOOST_VERSION / 100 % 1000, BOOST_VERSION % 100 );
+
+        BOOST_TEST_CSTR_EQ( av[1], version );
+    }
+
+    return boost::report_errors();
+}
diff --git a/test/compile_test/CMakeLists.txt b/test/compile_test/CMakeLists.txt
index d4d3a5dcdd..acfa292228 100644
--- a/test/compile_test/CMakeLists.txt
+++ b/test/compile_test/CMakeLists.txt
@@ -2,7 +2,6 @@
 # Distributed under the Boost Software License, Version 1.0.
 # https://www.boost.org/LICENSE_1_0.txt
 
-file(GLOB SOURCES "*.cpp")
-add_library(boost_math-compile_tests STATIC ${SOURCES})
-target_compile_features(boost_math-compile_tests PRIVATE cxx_std_17)
-target_link_libraries(boost_math-compile_tests PUBLIC Boost::math)
+include_directories(../../include_private)
+file(GLOB SRC_FILES CONFIGURE_DEPENDS "*.cpp")
+boost_test(TYPE "compile" SOURCES ${SRC_FILES} COMPILE_DEFINITIONS BOOST_MATH_STANDALONE COMPILE_FEATURES cxx_std_17 LINK_LIBRARIES Boost::math Boost::multiprecision Boost::numeric_ublas Boost::unit_test_framework )
diff --git a/test/compile_test/dist_holtsmark_incl_test.cpp b/test/compile_test/dist_holtsmark_incl_test.cpp
new file mode 100644
index 0000000000..f90bbb9f07
--- /dev/null
+++ b/test/compile_test/dist_holtsmark_incl_test.cpp
@@ -0,0 +1,26 @@
+//  Copyright John Maddock 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Basic sanity check that header <boost/math/distributions/landau.hpp>
+// #includes all the files that it needs to.
+//
+#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false
+#include <boost/math/distributions/holtsmark.hpp>
+//
+// Note this header includes no other headers, this is
+// important if this test is to be meaningful:
+//
+#include "test_compile_result.hpp"
+
+void compile_and_link_test()
+{
+   TEST_DIST_FUNC(holtsmark)
+}
+
+template class boost::math::holtsmark_distribution<float, boost::math::policies::policy<> >;
+template class boost::math::holtsmark_distribution<double, boost::math::policies::policy<> >;
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+template class boost::math::holtsmark_distribution<long double, boost::math::policies::policy<> >;
+#endif
diff --git a/test/compile_test/dist_landau_incl_test.cpp b/test/compile_test/dist_landau_incl_test.cpp
new file mode 100644
index 0000000000..5b63a710ab
--- /dev/null
+++ b/test/compile_test/dist_landau_incl_test.cpp
@@ -0,0 +1,26 @@
+//  Copyright John Maddock 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Basic sanity check that header <boost/math/distributions/landau.hpp>
+// #includes all the files that it needs to.
+//
+#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false
+#include <boost/math/distributions/landau.hpp>
+//
+// Note this header includes no other headers, this is
+// important if this test is to be meaningful:
+//
+#include "test_compile_result.hpp"
+
+void compile_and_link_test()
+{
+   TEST_DIST_FUNC(landau)
+}
+
+template class boost::math::landau_distribution<float, boost::math::policies::policy<> >;
+template class boost::math::landau_distribution<double, boost::math::policies::policy<> >;
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+template class boost::math::landau_distribution<long double, boost::math::policies::policy<> >;
+#endif
diff --git a/test/compile_test/dist_mapairy_incl_test.cpp b/test/compile_test/dist_mapairy_incl_test.cpp
new file mode 100644
index 0000000000..7b953711dd
--- /dev/null
+++ b/test/compile_test/dist_mapairy_incl_test.cpp
@@ -0,0 +1,26 @@
+//  Copyright John Maddock 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Basic sanity check that header <boost/math/distributions/landau.hpp>
+// #includes all the files that it needs to.
+//
+#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false
+#include <boost/math/distributions/mapairy.hpp>
+//
+// Note this header includes no other headers, this is
+// important if this test is to be meaningful:
+//
+#include "test_compile_result.hpp"
+
+void compile_and_link_test()
+{
+   TEST_DIST_FUNC(mapairy)
+}
+
+template class boost::math::mapairy_distribution<float, boost::math::policies::policy<> >;
+template class boost::math::mapairy_distribution<double, boost::math::policies::policy<> >;
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+template class boost::math::mapairy_distribution<long double, boost::math::policies::policy<> >;
+#endif
diff --git a/test/compile_test/dist_saspoint5_incl_test.cpp b/test/compile_test/dist_saspoint5_incl_test.cpp
new file mode 100644
index 0000000000..e48d3691df
--- /dev/null
+++ b/test/compile_test/dist_saspoint5_incl_test.cpp
@@ -0,0 +1,26 @@
+//  Copyright John Maddock 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+// Basic sanity check that header <boost/math/distributions/landau.hpp>
+// #includes all the files that it needs to.
+//
+#define BOOST_MATH_ASSERT_UNDEFINED_POLICY false
+#include <boost/math/distributions/saspoint5.hpp>
+//
+// Note this header includes no other headers, this is
+// important if this test is to be meaningful:
+//
+#include "test_compile_result.hpp"
+
+void compile_and_link_test()
+{
+   TEST_DIST_FUNC(saspoint5)
+}
+
+template class boost::math::saspoint5_distribution<float, boost::math::policies::policy<> >;
+template class boost::math::saspoint5_distribution<double, boost::math::policies::policy<> >;
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+template class boost::math::saspoint5_distribution<long double, boost::math::policies::policy<> >;
+#endif
diff --git a/test/compile_test/instantiate.hpp b/test/compile_test/instantiate.hpp
index bdb6d515cf..85c85f785d 100644
--- a/test/compile_test/instantiate.hpp
+++ b/test/compile_test/instantiate.hpp
@@ -28,6 +28,7 @@ template <class RealType> bool instantiate_mixed_runner_result<RealType>::value;
 
 #include <boost/math/special_functions.hpp>
 #include <boost/concept_archetype.hpp>
+#include <boost/concept_check.hpp>
 #include <boost/math/distributions.hpp>
 
 #if !defined(BOOST_MATH_NO_DISTRIBUTION_CONCEPT_TESTS)
@@ -73,6 +74,33 @@ BOOST_MATH_DECLARE_DISTRIBUTIONS(double, test_policy)
 #  define TEST_GROUP_15
 #endif
 
+template <class RealType>
+void instantiate_for_fixed_precision_only(RealType, const std::true_type&)
+{
+   using namespace boost;
+   using namespace boost::math;
+   using namespace boost::math::concepts;
+
+#ifdef TEST_GROUP_1
+#if !defined(BOOST_MATH_NO_DISTRIBUTION_CONCEPT_TESTS)
+   function_requires<DistributionConcept<landau_distribution<RealType> > >();
+   function_requires<DistributionConcept<landau_distribution<RealType, test_policy> > >();
+   function_requires<DistributionConcept<dist_test::landau > >();
+   function_requires<DistributionConcept<mapairy_distribution<RealType> > >();
+   function_requires<DistributionConcept<mapairy_distribution<RealType, test_policy> > >();
+   function_requires<DistributionConcept<dist_test::mapairy > >();
+   function_requires<DistributionConcept<holtsmark_distribution<RealType> > >();
+   function_requires<DistributionConcept<holtsmark_distribution<RealType, test_policy> > >();
+   function_requires<DistributionConcept<dist_test::holtsmark> >();
+   function_requires<DistributionConcept<saspoint5_distribution<RealType> > >();
+   function_requires<DistributionConcept<saspoint5_distribution<RealType, test_policy> > >();
+   function_requires<DistributionConcept<dist_test::saspoint5> >();
+#endif
+#endif
+}
+template <class RealType>
+void instantiate_for_fixed_precision_only(RealType, const std::false_type&){}
+
 template <class RealType>
 void instantiate(RealType)
 {
@@ -118,6 +146,9 @@ void instantiate(RealType)
    function_requires<DistributionConcept<triangular_distribution<RealType> > >();
    function_requires<DistributionConcept<uniform_distribution<RealType> > >();
    function_requires<DistributionConcept<weibull_distribution<RealType> > >();
+
+   instantiate_for_fixed_precision_only(RealType(), std::integral_constant<bool, std::numeric_limits<RealType>::is_specialized && (std::numeric_limits<RealType>::digits <= 113) && (std::numeric_limits<RealType>::radix == 2)>());
+
    #endif // !defined(BOOST_MATH_NO_DISTRIBUTION_CONCEPT_TESTS)
 #endif
 #ifndef BOOST_MATH_INSTANTIATE_MINIMUM
diff --git a/test/compile_test/tools_remez_inc_test.cpp b/test/compile_test/tools_remez_inc_test.cpp
index 938bd76615..98841b687a 100644
--- a/test/compile_test/tools_remez_inc_test.cpp
+++ b/test/compile_test/tools_remez_inc_test.cpp
@@ -8,5 +8,5 @@
 //
 
 #ifndef BOOST_MATH_STANDALONE
-#include <boost/math/tools/remez.hpp>
+#include "../../include_private/boost/math/tools/remez.hpp"
 #endif
diff --git a/test/compile_test/tools_solve_inc_test.cpp b/test/compile_test/tools_solve_inc_test.cpp
index ee5f79d37a..715cdcc37a 100644
--- a/test/compile_test/tools_solve_inc_test.cpp
+++ b/test/compile_test/tools_solve_inc_test.cpp
@@ -7,5 +7,5 @@
 // #includes all the files that it needs to.
 //
 #ifndef BOOST_MATH_STANDALONE
-#include <boost/math/tools/solve.hpp>
+#include "../../include_private/boost/math/tools/solve.hpp"
 #endif
diff --git a/test/compile_test/tools_test_data_inc_test.cpp b/test/compile_test/tools_test_data_inc_test.cpp
index 6be425d64d..66d4a1ccde 100644
--- a/test/compile_test/tools_test_data_inc_test.cpp
+++ b/test/compile_test/tools_test_data_inc_test.cpp
@@ -7,7 +7,7 @@
 // #includes all the files that it needs to.
 //
 #ifndef BOOST_MATH_STANDALONE
-#include <boost/math/tools/test_data.hpp>
+#include "../../include_private/boost/math/tools/test_data.hpp"
 
 #define T double
 
diff --git a/test/compile_test/tools_test_inc_test.cpp b/test/compile_test/tools_test_inc_test.cpp
index 10fea03a1d..01b0968d9b 100644
--- a/test/compile_test/tools_test_inc_test.cpp
+++ b/test/compile_test/tools_test_inc_test.cpp
@@ -9,7 +9,7 @@
 #include <array>
 
 #ifndef BOOST_MATH_STANDALONE
-#include <boost/math/tools/test.hpp>
+#include "../../include_private/boost/math/tools/test.hpp"
 //
 // Note this header includes no other headers, this is
 // important if this test is to be meaningful:
diff --git a/test/cuda_jamfile b/test/cuda_jamfile
new file mode 100644
index 0000000000..02dcea8382
--- /dev/null
+++ b/test/cuda_jamfile
@@ -0,0 +1,389 @@
+# Copyright 2024 Matt Borland
+# Distributed under the Boost Software License, Version 1.0.
+# https://www.boost.org/LICENSE_1_0.txt
+
+import testing ;
+import ../../config/checks/config : requires ;
+
+project : requirements
+    [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
+    ;
+
+# Quad
+run test_exp_sinh_quad_float.cu ;
+run test_exp_sinh_quad_double.cu ;
+run test_sinh_sinh_quad_float.cu ;
+run test_sinh_sinh_quad_double.cu ;
+
+# Distributions
+run test_arcsine_cdf_double.cu ;
+run test_arcsine_cdf_float.cu ;
+run test_arcsine_pdf_double.cu ;
+run test_arcsine_pdf_float.cu ;
+run test_arcsine_quan_double.cu ;
+run test_arcsine_quan_float.cu ;
+run test_arcsine_range_support_double.cu ;
+
+run test_bernoulli_cdf_double.cu ;
+run test_bernoulli_cdf_float.cu ;
+run test_bernoulli_pdf_double.cu ;
+run test_bernoulli_pdf_float.cu ;
+run test_bernoulli_range_support_double.cu ;
+run test_bernoulli_range_support_float.cu ;
+
+run test_beta_dist_cdf_double.cu ;
+run test_beta_dist_cdf_float.cu ;
+run test_beta_dist_pdf_double.cu ;
+run test_beta_dist_pdf_float.cu ;
+run test_beta_dist_quan_double.cu ;
+run test_beta_dist_quan_float.cu ;
+
+run test_cauchy_cdf_double.cu ;
+run test_cauchy_cdf_float.cu ;
+run test_cauchy_pdf_double.cu ;
+run test_cauchy_pdf_float.cu ;
+run test_cauchy_quan_double.cu ;
+run test_cauchy_quan_float.cu ;
+run test_cauchy_range_support_double.cu ;
+run test_cauchy_range_support_float.cu ;
+
+run test_chi_squared_cdf_double.cu ;
+run test_chi_squared_cdf_float.cu ;
+run test_chi_squared_pdf_double.cu ;
+run test_chi_squared_pdf_float.cu ;
+run test_chi_squared_quan_double.cu ;
+run test_chi_squared_quan_float.cu ;
+
+run test_exponential_cdf_double.cu ;
+run test_exponential_cdf_float.cu ;
+run test_exponential_pdf_double.cu ;
+run test_exponential_pdf_float.cu ;
+run test_exponential_quan_double.cu ;
+run test_exponential_quan_float.cu ;
+run test_exponential_range_support_double.cu ;
+run test_exponential_range_support_float.cu ;
+
+run test_extreme_value_cdf_double.cu ;
+run test_extreme_value_cdf_float.cu ;
+run test_extreme_value_pdf_double.cu ;
+run test_extreme_value_pdf_float.cu ;
+run test_extreme_value_quan_double.cu ;
+run test_extreme_value_quan_float.cu ;
+
+run test_fisher_f_cdf_double.cu ;
+run test_fisher_f_cdf_float.cu ;
+run test_fisher_f_pdf_double.cu ;
+run test_fisher_f_pdf_float.cu ;
+run test_fisher_f_quan_double.cu ;
+run test_fisher_f_quan_float.cu ;
+
+run test_gamma_dist_cdf_double.cu ;
+run test_gamma_dist_cdf_float.cu ;
+run test_gamma_dist_pdf_double.cu ;
+run test_gamma_dist_pdf_float.cu ;
+run test_gamma_dist_quan_double.cu ;
+run test_gamma_dist_quan_float.cu ;
+
+run test_geometric_dist_cdf_double.cu ;
+run test_geometric_dist_cdf_float.cu ;
+run test_geometric_dist_pdf_double.cu ;
+run test_geometric_dist_pdf_float.cu ;
+run test_geometric_dist_quan_double.cu ;
+run test_geometric_dist_quan_float.cu ;
+
+run test_holtsmark_cdf_double.cu ;
+run test_holtsmark_cdf_float.cu ;
+run test_holtsmark_pdf_double.cu ;
+run test_holtsmark_pdf_float.cu ;
+
+run test_inverse_chi_squared_cdf_double.cu ;
+run test_inverse_chi_squared_cdf_float.cu ;
+run test_inverse_chi_squared_pdf_double.cu ;
+run test_inverse_chi_squared_pdf_float.cu ;
+run test_inverse_chi_squared_quan_double.cu ;
+run test_inverse_chi_squared_quan_float.cu ;
+
+run test_inverse_gamma_cdf_double.cu ;
+run test_inverse_gamma_cdf_float.cu ;
+run test_inverse_gamma_pdf_double.cu ;
+run test_inverse_gamma_pdf_float.cu ;
+run test_inverse_gamma_quan_double.cu ;
+run test_inverse_gamma_quan_float.cu ;
+
+run test_inverse_gaussian_cdf_double.cu ;
+run test_inverse_gaussian_cdf_float.cu ;
+run test_inverse_gaussian_pdf_double.cu ;
+run test_inverse_gaussian_pdf_float.cu ;
+run test_inverse_gaussian_quan_double.cu ;
+run test_inverse_gaussian_quan_float.cu ;
+
+run test_landau_cdf_double.cu ;
+run test_landau_cdf_float.cu ;
+run test_landau_pdf_double.cu ;
+run test_landau_pdf_float.cu ;
+run test_landau_quan_double.cu;
+run test_landau_quan_float.cu ;
+
+run test_laplace_cdf_double.cu ;
+run test_laplace_cdf_float.cu ;
+run test_laplace_pdf_double.cu ;
+run test_laplace_pdf_float.cu ;
+run test_laplace_quan_double.cu ;
+run test_laplace_quan_float.cu ;
+
+run test_logistic_cdf_double.cu ;
+run test_logistic_cdf_float.cu ;
+run test_logistic_pdf_double.cu ;
+run test_logistic_pdf_float.cu ;
+run test_logistic_quan_double.cu ;
+run test_logistic_quan_float.cu ;
+
+run test_lognormal_cdf_double.cu ;
+run test_lognormal_cdf_float.cu ;
+run test_lognormal_pdf_double.cu ;
+run test_lognormal_pdf_float.cu ;
+run test_lognormal_quan_double.cu ;
+run test_lognormal_quan_float.cu ;
+
+run test_mapairy_cdf_double.cu ;
+run test_mapairy_cdf_float.cu ;
+run test_mapairy_pdf_double.cu ;
+run test_mapairy_pdf_float.cu ;
+run test_mapairy_quan_double.cu ;
+run test_mapairy_quan_float.cu ;
+
+run test_nc_beta_cdf_double.cu ;
+run test_nc_beta_cdf_float.cu ;
+run test_nc_beta_pdf_double.cu ;
+run test_nc_beta_pdf_float.cu ;
+run test_nc_beta_quan_double.cu ;
+run test_nc_beta_quan_float.cu ;
+
+run test_nc_f_cdf_double.cu ;
+run test_nc_f_cdf_float.cu ;
+run test_nc_f_pdf_double.cu ;
+run test_nc_f_pdf_float.cu ;
+run test_nc_f_quan_double.cu ;
+run test_nc_f_quan_float.cu ;
+
+run test_nc_chi_squared_cdf_double.cu ;
+run test_nc_chi_squared_cdf_float.cu ;
+run test_nc_chi_squared_pdf_double.cu ;
+run test_nc_chi_squared_pdf_float.cu ;
+run test_nc_chi_squared_quan_double.cu ;
+run test_nc_chi_squared_quan_float.cu ;
+
+run test_negative_binomial_cdf_double.cu ;
+run test_negative_binomial_cdf_float.cu ;
+run test_negative_binomial_pdf_double.cu ;
+run test_negative_binomial_pdf_float.cu ;
+run test_negative_binomial_quan_double.cu ;
+run test_negative_binomial_quan_float.cu ;
+
+run test_normal_cdf_double.cu ;
+run test_normal_cdf_float.cu ;
+run test_normal_pdf_double.cu ;
+run test_normal_pdf_float.cu ;
+run test_normal_quan_double.cu ;
+run test_normal_quan_float.cu ;
+
+run test_pareto_cdf_double.cu ;
+run test_pareto_cdf_float.cu ;
+run test_pareto_pdf_double.cu ;
+run test_pareto_pdf_float.cu ;
+run test_pareto_quan_double.cu ;
+run test_pareto_quan_float.cu ;
+
+run test_poisson_cdf_double.cu ;
+run test_poisson_cdf_float.cu ;
+run test_poisson_pdf_double.cu ;
+run test_poisson_pdf_float.cu ;
+run test_poisson_quan_double.cu ;
+run test_poisson_quan_float.cu ;
+
+run test_rayleigh_cdf_double.cu ;
+run test_rayleigh_cdf_float.cu ;
+run test_rayleigh_pdf_double.cu ;
+run test_rayleigh_pdf_float.cu ;
+run test_rayleigh_quan_double.cu ;
+run test_rayleigh_quan_float.cu ;
+
+run test_saspoint5_cdf_double.cu ;
+run test_saspoint5_cdf_float.cu ;
+run test_saspoint5_pdf_double.cu ;
+run test_saspoint5_pdf_float.cu ;
+run test_saspoint5_quan_double.cu ;
+run test_saspoint5_quan_float.cu ;
+
+run test_students_t_cdf_double.cu ;
+run test_students_t_cdf_float.cu ;
+run test_students_t_pdf_double.cu ;
+run test_students_t_pdf_float.cu ;
+run test_students_t_quan_double.cu ;
+run test_students_t_quan_float.cu ;
+
+run test_triangular_cdf_double.cu ;
+run test_triangular_cdf_float.cu ;
+run test_triangular_pdf_double.cu ;
+run test_triangular_pdf_float.cu ;
+run test_triangular_quan_double.cu ;
+run test_triangular_quan_float.cu ;
+
+run test_uniform_cdf_double.cu ;
+run test_uniform_cdf_float.cu ;
+run test_uniform_pdf_double.cu ;
+run test_uniform_pdf_float.cu ;
+run test_uniform_quan_double.cu ;
+run test_uniform_quan_float.cu ;
+
+run test_weibull_cdf_double.cu ;
+run test_weibull_cdf_float.cu ;
+run test_weibull_pdf_double.cu ;
+run test_weibull_pdf_float.cu ;
+run test_weibull_quan_double.cu ;
+run test_weibull_quan_float.cu ;
+
+# Special Functions
+run test_airy_ai_double.cu ;
+run test_airy_ai_float.cu ;
+run test_airy_ai_prime_double.cu ;
+run test_airy_ai_prime_float.cu ;
+run test_airy_bi_double.cu ;
+run test_airy_bi_float.cu ;
+run test_airy_bi_prime_double.cu ;
+run test_airy_bi_prime_float.cu ;
+
+run test_beta_double.cu ;
+run test_beta_float.cu ;
+run test_betac_double.cu ;
+run test_betac_float.cu ;
+run test_ibeta_double.cu ;
+run test_ibeta_float.cu ;
+run test_ibeta_derivative_double.cu ;
+run test_ibeta_derivative_float.cu ;
+run test_ibeta_inv_double.cu ;
+run test_ibeta_inv_float.cu ;
+run test_ibeta_inva_double.cu ;
+run test_ibeta_inva_float.cu ;
+run test_ibeta_invb_double.cu ;
+run test_ibeta_invb_float.cu ;
+run test_ibetac_inv_double.cu ;
+run test_ibetac_inv_float.cu ;
+run test_ibetac_inva_double.cu ;
+run test_ibetac_inva_float.cu ;
+run test_ibetac_invb_double.cu ;
+run test_ibetac_invb_float.cu ;
+
+run test_bessel_i0_double.cu ;
+run test_bessel_i0_float.cu ;
+run test_bessel_i1_double.cu ;
+run test_bessel_i1_float.cu ;
+run test_bessel_j0_double.cu ;
+run test_bessel_j0_float.cu ;
+run test_bessel_j1_double.cu ;
+run test_bessel_j1_float.cu ;
+run test_bessel_k0_double.cu ;
+run test_bessel_k0_float.cu ;
+run test_bessel_k1_double.cu ;
+run test_bessel_k1_float.cu ;
+run test_bessel_kn_double.cu ;
+run test_bessel_kn_float.cu ;
+run test_bessel_y0_double.cu ;
+run test_bessel_y0_float.cu ;
+run test_bessel_y1_double.cu ;
+run test_bessel_y1_float.cu ;
+run test_cyl_bessel_i_double.cu ;
+run test_cyl_bessel_i_float.cu ;
+run test_cyl_bessel_j_double.cu ;
+run test_cyl_bessel_j_float.cu ;
+run test_cyl_bessel_k_double.cu ;
+run test_cyl_bessel_k_float.cu ;
+run test_sph_bessel_double.cu ;
+run test_sph_bessel_float.cu ;
+run test_cyl_neumann_double.cu ;
+run test_cyl_neumann_float.cu ;
+run test_sph_neumann_double.cu ;
+run test_sph_neumann_float.cu ;
+run test_cyl_hankel_1_double.cu ;
+run test_cyl_hankel_1_float.cu ;
+run test_cyl_hankel_2_double.cu ;
+run test_cyl_hankel_2_float.cu ;
+run test_sph_hankel_1_double.cu ;
+run test_sph_hankel_1_float.cu ;
+run test_sph_hankel_2_double.cu ;
+run test_sph_hankel_2_float.cu ;
+
+run test_cbrt_double.cu ;
+run test_cbrt_float.cu ;
+
+run test_changesign_double.cu ;
+run test_changesign_float.cu ;
+
+run test_cos_pi_double.cu ;
+run test_cos_pi_float.cu ;
+
+run test_digamma_double.cu ;
+run test_digamma_float.cu ;
+
+run test_ellint_1_double.cu ;
+run test_ellint_1_float.cu ;
+run test_ellint_2_double.cu ;
+run test_ellint_2_float.cu ;
+run test_ellint_3_double.cu ;
+run test_ellint_3_float.cu ;
+run test_ellint_d_double.cu ;
+run test_ellint_d_float.cu ;
+run test_jacobi_zeta_double.cu ;
+run test_jacobi_zeta_float.cu ;
+run test_heuman_lambda_double.cu ;
+run test_heuman_lambda_float.cu ;
+
+run test_erf_double.cu ;
+run test_erf_float.cu ;
+run test_erf_inv_double.cu ;
+run test_erf_inv_float.cu ;
+run test_erfc_double.cu ;
+run test_erfc_float.cu ;
+run test_erfc_inv_double.cu ;
+run test_erfc_inv_float.cu ;
+
+run test_expint_double.cu ;
+run test_expint_float.cu ;
+
+run test_expm1_double.cu ;
+run test_expm1_float.cu ;
+
+run test_gegenbauer_double.cu ;
+run test_gegenbauer_float.cu ;
+
+run test_hermite_double.cu ;
+run test_hermite_float.cu ;
+
+run test_lgamma_double.cu ;
+run test_lgamma_float.cu ;
+run test_tgamma_double.cu ;
+run test_tgamma_float.cu ;
+run test_tgamma_ratio_double.cu ;
+run test_tgamma_ratio_float.cu ;
+run test_gamma_p_derivative_double.cu ;
+run test_gamma_p_derivative_float.cu ;
+run test_gamma_p_inv_double.cu ;
+run test_gamma_p_inv_float.cu ;
+
+run test_log1p_double.cu ;
+run test_log1p_float.cu ;
+
+run test_modf_double.cu ;
+run test_modf_float.cu ;
+
+run test_round_double.cu ;
+run test_round_float.cu ;
+
+run test_sin_pi_double.cu ; 
+run test_sin_pi_float.cu ;
+
+run test_trigamma_double.cu ;
+run test_trigamma_float.cu ;
+
+run test_trunc_double.cu ;
+run test_trunc_float.cu ;
diff --git a/test/cuda_managed_ptr.hpp b/test/cuda_managed_ptr.hpp
new file mode 100644
index 0000000000..3d0f3e800d
--- /dev/null
+++ b/test/cuda_managed_ptr.hpp
@@ -0,0 +1,139 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_CUDA_MANAGED_PTR_HPP
+#define BOOST_MATH_CUDA_MANAGED_PTR_HPP
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include <cuda_runtime.h>
+
+class managed_holder_base
+{
+protected:
+   static int count;
+   managed_holder_base() { ++count; }
+   ~managed_holder_base()
+   {
+      if(0 == --count)
+         cudaDeviceSynchronize();
+   }
+};
+
+int managed_holder_base::count = 0;
+
+//
+// Reset the device and exit:
+// cudaDeviceReset causes the driver to clean up all state. While
+// not mandatory in normal operation, it is good practice.  It is also
+// needed to ensure correct operation when the application is being
+// profiled. Calling cudaDeviceReset causes all profile data to be
+// flushed before the application exits.
+//
+// We have a global instance of this class, plus instances for each
+// managed pointer.  Last one out the door switches the lights off.
+//
+class cudaResetter
+{
+   static int count;
+public:
+   cudaResetter() { ++count;  }
+   ~cudaResetter()
+   {
+      if(--count == 0)
+      {
+         cudaError_t err = cudaDeviceReset();
+         if(err != cudaSuccess)
+         {
+            std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl;
+         }
+      }
+   }
+};
+
+int cudaResetter::count = 0;
+
+cudaResetter global_resetter;
+
+template <class T>
+class cuda_managed_ptr
+{
+   T* data;
+   static const cudaResetter resetter;
+   cuda_managed_ptr(const cuda_managed_ptr&) = delete;
+   cuda_managed_ptr& operator=(cuda_managed_ptr const&) = delete;
+   void free()
+   {
+      if(data)
+      {
+         cudaDeviceSynchronize();
+         cudaError_t err = cudaFree(data);
+         if(err != cudaSuccess)
+         {
+            std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl;
+         }
+      }
+   }
+public:
+   cuda_managed_ptr() : data(0) {}
+   cuda_managed_ptr(std::size_t n)
+   {
+      cudaError_t err = cudaSuccess;
+      void *ptr;
+      err = cudaMallocManaged(&ptr, n * sizeof(T));
+      if(err != cudaSuccess)
+         throw std::runtime_error(cudaGetErrorString(err));
+      cudaDeviceSynchronize();
+      data = static_cast<T*>(ptr);
+   }
+   cuda_managed_ptr(cuda_managed_ptr&& o)
+   {
+      data = o.data;
+      o.data = 0;
+   }
+   cuda_managed_ptr& operator=(cuda_managed_ptr&& o)
+   {
+      free();
+      data = o.data;
+      o.data = 0;
+      return *this;
+   }
+   ~cuda_managed_ptr()
+   {
+      free();
+   }
+
+   class managed_holder : managed_holder_base
+   {
+      T* pdata;
+   public:
+      managed_holder(T* p) : managed_holder_base(), pdata(p) {}
+      managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {}
+      operator T* () { return pdata; }
+      T& operator[] (std::size_t n) { return pdata[n]; }
+   };
+   class const_managed_holder : managed_holder_base
+   {
+      const T* pdata;
+   public:
+      const_managed_holder(T* p) : managed_holder_base(), pdata(p) {}
+      const_managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {}
+      operator const T* () { return pdata; }
+      const T& operator[] (std::size_t n) { return pdata[n]; }
+   };
+
+   managed_holder get() { return managed_holder(data); }
+   const_managed_holder get()const { return data; }
+   T& operator[](std::size_t n) { return data[n]; }
+   const T& operator[](std::size_t n)const { return data[n]; }
+};
+
+template <class T>
+cudaResetter const cuda_managed_ptr<T>::resetter;
+
+#endif
diff --git a/test/daubechies_scaling_test.cpp b/test/daubechies_scaling_test.cpp
index 3fd0937ece..e2dda727e4 100644
--- a/test/daubechies_scaling_test.cpp
+++ b/test/daubechies_scaling_test.cpp
@@ -297,8 +297,8 @@ void test_first_derivative()
    // Limited precision test data means we can't test long double here...
 #else
     auto phi1_3 = boost::math::detail::daubechies_scaling_integer_grid<long double, 3, 1>();
-    std::array<long double, 6> lin_3{0.0L, 1.638452340884085725014976L, -2.232758190463137395017742L,
-                                     0.5501593582740176149905562L, 0.04414649130503405501220997L, 0.0L};
+    std::array<long double, 6> lin_3{0.0L, 1.638452340884085725014976113635604107L, -2.23275819046313739501774225255380757L,
+                                     0.550159358274017614990556164200803310L, 0.044146491305034055012209974717400368L, 0.0L};
     for (size_t i = 0; i < lin_3.size(); ++i)
     {
         if(!CHECK_ULP_CLOSE(lin_3[i], phi1_3[i], 0))
@@ -308,8 +308,8 @@ void test_first_derivative()
     }
 
     auto phi1_4 = boost::math::detail::daubechies_scaling_integer_grid<long double, 4, 1>();
-    std::array<long double, 8> lin_4 = {0.0L, 1.776072007522184640093776L, -2.785349397229543142492785L, 1.192452536632278174347632L,
-                                       -0.1313745151846729587935189L, -0.05357102822023923595359996L,0.001770396479992522798495351L, 0.0L};
+    std::array<long double, 8> lin_4 = {0.0L, 1.776072007522184640093776071522502761L, -2.785349397229543142492784905731245880L, 1.192452536632278174347632339082851360L,
+                                       -0.131374515184672958793518896272545740L, -0.053571028220239235953599959390993709L,0.001770396479992522798495350789431024L, 0.0L};
 
     for (size_t i = 0; i < lin_4.size(); ++i)
     {
@@ -319,8 +319,8 @@ void test_first_derivative()
         }
     }
 
-    std::array<long double, 10> lin_5 = {0.0L, 1.558326313047001366564379L, -2.436012783189551921436896L, 1.235905129801454293947039L, -0.3674377136938866359947561L,
-                                        -0.02178035117564654658884556L,0.03234719350814368885815854L,-0.001335619912770701035229331L,-0.00001216838474354431384970525L,0.0L};
+    std::array<long double, 10> lin_5 = {0.0L, 1.558326313047001366564379221011472479L, -2.436012783189551921436895932290077033L, 1.235905129801454293947038906779457610L, -0.367437713693886635994756136622838186L,
+                                        -0.021780351175646546588845564309594589L,0.032347193508143688858158541500450925L,-0.001335619912770701035229330817898250L,-0.000012168384743544313849705250972915L,0.0L};
     auto phi1_5 = boost::math::detail::daubechies_scaling_integer_grid<long double, 5, 1>();
     for (size_t i = 0; i < lin_5.size(); ++i)
     {
diff --git a/test/float128/log1p_expm1_test.cpp b/test/float128/log1p_expm1_test.cpp
index 7948614403..c46d5b099a 100644
--- a/test/float128/log1p_expm1_test.cpp
+++ b/test/float128/log1p_expm1_test.cpp
@@ -14,7 +14,7 @@
 
 #include "table_type.hpp"
 
-#include "libs/math/test/log1p_expm1_test.hpp"
+#include "log1p_expm1_test.hpp"
 
 //
 // DESCRIPTION:
diff --git a/test/float128/powm1_sqrtp1m1_test.cpp b/test/float128/powm1_sqrtp1m1_test.cpp
index 73972bb7a0..bfc219bcc6 100644
--- a/test/float128/powm1_sqrtp1m1_test.cpp
+++ b/test/float128/powm1_sqrtp1m1_test.cpp
@@ -16,7 +16,7 @@
 
 #include "table_type.hpp"
 
-#include "libs/math/test/powm1_sqrtp1m1_test.hpp"
+#include "powm1_sqrtp1m1_test.hpp"
 
 //
 // DESCRIPTION:
diff --git a/test/float128/table_type.hpp b/test/float128/table_type.hpp
index 6560762db4..7e5c07b248 100644
--- a/test/float128/table_type.hpp
+++ b/test/float128/table_type.hpp
@@ -5,7 +5,7 @@
 
 #ifndef BOOST_MP_TABLE_TYPE
 
-#include <libs/math/test/table_type.hpp>
+#include <table_type.hpp>
 
 #define SC_(x) BOOST_FLOATMAX_C(x)
 
diff --git a/test/float128/test_bessel_i.cpp b/test/float128/test_bessel_i.cpp
index 952cc9d6a6..7e0374cdfc 100644
--- a/test/float128/test_bessel_i.cpp
+++ b/test/float128/test_bessel_i.cpp
@@ -12,7 +12,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/bessel.hpp>
-#include "libs/math/test/test_bessel_i.hpp"
+#include "test_bessel_i.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_bessel_j.cpp b/test/float128/test_bessel_j.cpp
index 7afeeebeb4..f3bab11fd7 100644
--- a/test/float128/test_bessel_j.cpp
+++ b/test/float128/test_bessel_j.cpp
@@ -12,7 +12,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/bessel.hpp>
-#include "libs/math/test/test_bessel_j.hpp"
+#include "test_bessel_j.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_bessel_k.cpp b/test/float128/test_bessel_k.cpp
index a5ec1e2b4e..7f7144649f 100644
--- a/test/float128/test_bessel_k.cpp
+++ b/test/float128/test_bessel_k.cpp
@@ -12,7 +12,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/bessel.hpp>
-#include "libs/math/test/test_bessel_k.hpp"
+#include "test_bessel_k.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_bessel_y.cpp b/test/float128/test_bessel_y.cpp
index 55bdf56e71..240d7785a1 100644
--- a/test/float128/test_bessel_y.cpp
+++ b/test/float128/test_bessel_y.cpp
@@ -12,7 +12,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/bessel.hpp>
-#include "libs/math/test/test_bessel_y.hpp"
+#include "test_bessel_y.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_beta.cpp b/test/float128/test_beta.cpp
index 6cfddd566a..ecdf347964 100644
--- a/test/float128/test_beta.cpp
+++ b/test/float128/test_beta.cpp
@@ -12,7 +12,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/beta.hpp>
-#include "libs/math/test/test_beta.hpp"
+#include "test_beta.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_binomial_coeff.cpp b/test/float128/test_binomial_coeff.cpp
index be208f4f55..392150b06f 100644
--- a/test/float128/test_binomial_coeff.cpp
+++ b/test/float128/test_binomial_coeff.cpp
@@ -12,7 +12,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/binomial.hpp>
-#include "libs/math/test/test_binomial_coeff.hpp"
+#include "test_binomial_coeff.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_carlson.cpp b/test/float128/test_carlson.cpp
index 1458493732..0954816c36 100644
--- a/test/float128/test_carlson.cpp
+++ b/test/float128/test_carlson.cpp
@@ -11,7 +11,7 @@
 #include <boost/math/special_functions/ellint_rf.hpp>
 #include <boost/math/special_functions/ellint_rj.hpp>
 #include <boost/math/special_functions/ellint_rg.hpp>
-#include "libs/math/test/test_carlson.hpp"
+#include "test_carlson.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_cbrt.cpp b/test/float128/test_cbrt.cpp
index d6690bdd98..7b53b8a434 100644
--- a/test/float128/test_cbrt.cpp
+++ b/test/float128/test_cbrt.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/cbrt.hpp>
-#include "libs/math/test/test_cbrt.hpp"
+#include "test_cbrt.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_digamma.cpp b/test/float128/test_digamma.cpp
index 9856223bef..5702ccf73b 100644
--- a/test/float128/test_digamma.cpp
+++ b/test/float128/test_digamma.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/digamma.hpp>
-#include "libs/math/test/test_digamma.hpp"
+#include "test_digamma.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_ellint_1.cpp b/test/float128/test_ellint_1.cpp
index 5c259e9405..90f9e1bed5 100644
--- a/test/float128/test_ellint_1.cpp
+++ b/test/float128/test_ellint_1.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/ellint_1.hpp>
-#include "libs/math/test/test_ellint_1.hpp"
+#include "test_ellint_1.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_ellint_2.cpp b/test/float128/test_ellint_2.cpp
index 8b05124670..3f43e8c1dc 100644
--- a/test/float128/test_ellint_2.cpp
+++ b/test/float128/test_ellint_2.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/ellint_2.hpp>
-#include "libs/math/test/test_ellint_2.hpp"
+#include "test_ellint_2.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_ellint_3.cpp b/test/float128/test_ellint_3.cpp
index e462683888..644dd1cbb8 100644
--- a/test/float128/test_ellint_3.cpp
+++ b/test/float128/test_ellint_3.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/ellint_3.hpp>
-#include "libs/math/test/test_ellint_3.hpp"
+#include "test_ellint_3.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_erf.cpp b/test/float128/test_erf.cpp
index 90efd139e7..5f3bcaa46a 100644
--- a/test/float128/test_erf.cpp
+++ b/test/float128/test_erf.cpp
@@ -8,7 +8,7 @@
 #define TEST_UDT
 
 #include <boost/math/special_functions/erf.hpp>
-#include "libs/math/test/test_erf.hpp"
+#include "test_erf.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_expint.cpp b/test/float128/test_expint.cpp
index 73a1f6bb5f..ff9ece822d 100644
--- a/test/float128/test_expint.cpp
+++ b/test/float128/test_expint.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/expint.hpp>
-#include "libs/math/test/test_expint.hpp"
+#include "test_expint.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_gamma.cpp b/test/float128/test_gamma.cpp
index ddaae2adf0..bcec76083e 100644
--- a/test/float128/test_gamma.cpp
+++ b/test/float128/test_gamma.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/gamma.hpp>
-#include "libs/math/test/test_gamma.hpp"
+#include "test_gamma.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_hermite.cpp b/test/float128/test_hermite.cpp
index f933c6d27f..3b6fcdfb5d 100644
--- a/test/float128/test_hermite.cpp
+++ b/test/float128/test_hermite.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/hermite.hpp>
-#include "libs/math/test/test_hermite.hpp"
+#include "test_hermite.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_ibeta.cpp b/test/float128/test_ibeta.cpp
index 708a6950de..c46da77665 100644
--- a/test/float128/test_ibeta.cpp
+++ b/test/float128/test_ibeta.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/beta.hpp>
-#include "libs/math/test/test_ibeta.hpp"
+#include "test_ibeta.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_ibeta_inv_1.cpp b/test/float128/test_ibeta_inv_1.cpp
index 68049024f3..2fc059740c 100644
--- a/test/float128/test_ibeta_inv_1.cpp
+++ b/test/float128/test_ibeta_inv_1.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/beta.hpp>
-#include "libs/math/test/test_ibeta_inv.hpp"
+#include "test_ibeta_inv.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_ibeta_inv_ab_4.cpp b/test/float128/test_ibeta_inv_ab_4.cpp
index 3e0bc85816..d02a99f26f 100644
--- a/test/float128/test_ibeta_inv_ab_4.cpp
+++ b/test/float128/test_ibeta_inv_ab_4.cpp
@@ -11,7 +11,7 @@
 #define FULL_TEST
 
 #include <boost/math/special_functions/beta.hpp>
-#include "libs/math/test/test_ibeta_inv_ab.hpp"
+#include "test_ibeta_inv_ab.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_igamma.cpp b/test/float128/test_igamma.cpp
index d533254841..7a987c643e 100644
--- a/test/float128/test_igamma.cpp
+++ b/test/float128/test_igamma.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/gamma.hpp>
-#include "libs/math/test/test_igamma.hpp"
+#include "test_igamma.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_igamma_inv.cpp b/test/float128/test_igamma_inv.cpp
index 122db9d42e..bc9b4289e2 100644
--- a/test/float128/test_igamma_inv.cpp
+++ b/test/float128/test_igamma_inv.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/gamma.hpp>
-#include "libs/math/test/test_igamma_inv.hpp"
+#include "test_igamma_inv.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_igamma_inva.cpp b/test/float128/test_igamma_inva.cpp
index 0a244c2f45..19daa7cfa7 100644
--- a/test/float128/test_igamma_inva.cpp
+++ b/test/float128/test_igamma_inva.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/gamma.hpp>
-#include "libs/math/test/test_igamma_inva.hpp"
+#include "test_igamma_inva.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_laguerre.cpp b/test/float128/test_laguerre.cpp
index 04ae016b94..dfa475448c 100644
--- a/test/float128/test_laguerre.cpp
+++ b/test/float128/test_laguerre.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/laguerre.hpp>
-#include "libs/math/test/test_laguerre.hpp"
+#include "test_laguerre.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_legendre.cpp b/test/float128/test_legendre.cpp
index 463c2d90a7..e6de9644a7 100644
--- a/test/float128/test_legendre.cpp
+++ b/test/float128/test_legendre.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/legendre.hpp>
-#include "libs/math/test/test_legendre.hpp"
+#include "test_legendre.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_polygamma.cpp b/test/float128/test_polygamma.cpp
index e62cef9af6..1ff41e34f1 100644
--- a/test/float128/test_polygamma.cpp
+++ b/test/float128/test_polygamma.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/polygamma.hpp>
-#include "libs/math/test/test_polygamma.hpp"
+#include "test_polygamma.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_tgamma_ratio.cpp b/test/float128/test_tgamma_ratio.cpp
index 9d2568f558..0138ce472f 100644
--- a/test/float128/test_tgamma_ratio.cpp
+++ b/test/float128/test_tgamma_ratio.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/gamma.hpp>
-#include "libs/math/test/test_tgamma_ratio.hpp"
+#include "test_tgamma_ratio.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_trigamma.cpp b/test/float128/test_trigamma.cpp
index 8862b53d00..3ab2f9473b 100644
--- a/test/float128/test_trigamma.cpp
+++ b/test/float128/test_trigamma.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/trigamma.hpp>
-#include "libs/math/test/test_trigamma.hpp"
+#include "test_trigamma.hpp"
 
 void expected_results()
 {
diff --git a/test/float128/test_zeta.cpp b/test/float128/test_zeta.cpp
index 4dcdf19cc1..67aebfbff5 100644
--- a/test/float128/test_zeta.cpp
+++ b/test/float128/test_zeta.cpp
@@ -7,7 +7,7 @@
 #include "table_type.hpp"
 
 #include <boost/math/special_functions/zeta.hpp>
-#include "libs/math/test/test_zeta.hpp"
+#include "test_zeta.hpp"
 
 void expected_results()
 {
diff --git a/test/git_issue_1175.cpp b/test/git_issue_1175.cpp
new file mode 100644
index 0000000000..9770acf537
--- /dev/null
+++ b/test/git_issue_1175.cpp
@@ -0,0 +1,25 @@
+//  (C) Copyright Matt Borland 2023.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "math_unit_test.hpp"
+#include <iostream>
+#include <boost/math/distributions/beta.hpp>
+
+using namespace std;
+using boost::math::beta_distribution;
+
+int main(int argc, char* argv[])
+{
+   double a = 5.0;
+   double b = 5.0;
+   double p = 0.5;
+
+   beta_distribution<> dist(a, b);
+   double x = quantile(dist, p);
+
+   CHECK_ULP_CLOSE(x, 0.5, 2);
+
+   return boost::math::test::report_errors();
+}
diff --git a/test/git_issue_1194.cpp b/test/git_issue_1194.cpp
new file mode 100644
index 0000000000..1c364a0c4d
--- /dev/null
+++ b/test/git_issue_1194.cpp
@@ -0,0 +1,41 @@
+//  (C) Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include "math_unit_test.hpp"
+#include <boost/math/special_functions/gamma.hpp>
+#include <cerrno>
+
+int main()
+{
+    using c99_error_policy = ::boost::math::policies::policy<
+            ::boost::math::policies::domain_error< ::boost::math::policies::errno_on_error>,
+            ::boost::math::policies::pole_error< ::boost::math::policies::errno_on_error>,
+            ::boost::math::policies::overflow_error< ::boost::math::policies::errno_on_error>,
+            ::boost::math::policies::evaluation_error< ::boost::math::policies::errno_on_error>,
+            ::boost::math::policies::rounding_error< ::boost::math::policies::errno_on_error> >;
+
+    double val = -std::numeric_limits<double>::infinity();
+
+    val = boost::math::tgamma(val, c99_error_policy());
+    CHECK_EQUAL(errno, EDOM);
+
+    val = std::numeric_limits<double>::quiet_NaN();
+    val = boost::math::tgamma(val, c99_error_policy());
+    CHECK_EQUAL(errno, EDOM);
+
+    val = std::numeric_limits<double>::infinity();
+    val = boost::math::tgamma(val, c99_error_policy());
+    CHECK_EQUAL(errno, ERANGE);
+
+    val = 0;
+    val = boost::math::tgamma(val, c99_error_policy());
+    CHECK_EQUAL(errno, EDOM); // OK
+
+    val = -2;
+    val = boost::math::tgamma(val, c99_error_policy());
+    CHECK_EQUAL(errno, EDOM); // OK
+
+    return boost::math::test::report_errors();
+}
diff --git a/test/handle_test_result.hpp b/test/handle_test_result.hpp
index e909d64583..66bfe557b1 100644
--- a/test/handle_test_result.hpp
+++ b/test/handle_test_result.hpp
@@ -6,8 +6,8 @@
 #ifndef BOOST_MATH_HANDLE_TEST_RESULT
 #define BOOST_MATH_HANDLE_TEST_RESULT
 
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/math/tools/precision.hpp>
 #include <boost/regex.hpp>
 #include <boost/test/test_tools.hpp>
diff --git a/test/nvrtc_jamfile b/test/nvrtc_jamfile
new file mode 100644
index 0000000000..e049a24d2b
--- /dev/null
+++ b/test/nvrtc_jamfile
@@ -0,0 +1,388 @@
+# Copyright 2024 Matt Borland
+# Distributed under the Boost Software License, Version 1.0.
+# https://www.boost.org/LICENSE_1_0.txt
+
+import testing ;
+import ../../config/checks/config : requires ;
+
+project : requirements
+    [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
+    ;
+
+# Quad
+run test_exp_sinh_quad_nvrtc_float.cpp ;
+run test_exp_sinh_quad_nvrtc_double.cpp ;
+run test_sinh_sinh_quad_nvrtc_float.cpp ;
+run test_sinh_sinh_quad_nvrtc_double.cpp ;
+
+# Distributions
+run test_arcsine_cdf_nvrtc_double.cpp ;
+run test_arcsine_cdf_nvrtc_float.cpp ;
+run test_arcsine_pdf_nvrtc_double.cpp ;
+run test_arcsine_pdf_nvrtc_float.cpp ;
+run test_arcsine_quantile_nvrtc_double.cpp ;
+run test_arcsine_quantile_nvrtc_float.cpp ;
+
+run test_bernoulli_cdf_nvrtc_double.cpp ;
+run test_bernoulli_cdf_nvrtc_float.cpp ;
+run test_bernoulli_pdf_nvrtc_double.cpp ;
+run test_bernoulli_pdf_nvrtc_float.cpp ;
+run test_bernoulli_quan_nvrtc_double.cpp ;
+run test_bernoulli_quan_nvrtc_float.cpp ;
+
+run test_beta_dist_cdf_nvrtc_double.cpp ;
+run test_beta_dist_cdf_nvrtc_float.cpp ;
+run test_beta_dist_pdf_nvrtc_double.cpp ;
+run test_beta_dist_pdf_nvrtc_float.cpp ;
+run test_beta_dist_quan_nvrtc_double.cpp ;
+run test_beta_dist_quan_nvrtc_float.cpp ;
+
+run test_cauchy_cdf_nvrtc_double.cpp ;
+run test_cauchy_cdf_nvrtc_float.cpp ;
+run test_cauchy_pdf_nvrtc_double.cpp ;
+run test_cauchy_pdf_nvrtc_float.cpp ;
+run test_cauchy_quan_nvrtc_double.cpp ;
+run test_cauchy_quan_nvrtc_float.cpp ;
+
+run test_chi_squared_cdf_nvrtc_double.cpp ;
+run test_chi_squared_cdf_nvrtc_float.cpp ;
+run test_chi_squared_pdf_nvrtc_double.cpp ;
+run test_chi_squared_pdf_nvrtc_float.cpp ;
+run test_chi_squared_quan_nvrtc_double.cpp ;
+run test_chi_squared_quan_nvrtc_float.cpp ;
+
+run test_exponential_cdf_nvrtc_double.cpp ;
+run test_exponential_cdf_nvrtc_float.cpp ;
+run test_exponential_pdf_nvrtc_double.cpp ;
+run test_exponential_pdf_nvrtc_float.cpp ;
+run test_exponential_quan_nvrtc_double.cpp ;
+run test_exponential_quan_nvrtc_float.cpp ;
+
+run test_extreme_value_cdf_nvrtc_double.cpp ;
+run test_extreme_value_cdf_nvrtc_float.cpp ;
+run test_extreme_value_pdf_nvrtc_double.cpp ;
+run test_extreme_value_pdf_nvrtc_float.cpp ;
+run test_extreme_value_quan_nvrtc_double.cpp ;
+run test_extreme_value_quan_nvrtc_float.cpp ;
+
+run test_fisher_f_cdf_nvrtc_double.cpp ;
+run test_fisher_f_cdf_nvrtc_float.cpp ;
+run test_fisher_f_pdf_nvrtc_double.cpp ;
+run test_fisher_f_pdf_nvrtc_float.cpp ;
+run test_fisher_f_quan_nvrtc_double.cpp ;
+run test_fisher_f_quan_nvrtc_float.cpp ;
+
+run test_gamma_dist_cdf_nvrtc_double.cpp ;
+run test_gamma_dist_cdf_nvrtc_float.cpp ;
+run test_gamma_dist_pdf_nvrtc_double.cpp ;
+run test_gamma_dist_pdf_nvrtc_float.cpp ;
+run test_gamma_dist_quan_nvrtc_double.cpp ;
+run test_gamma_dist_quan_nvrtc_float.cpp ;
+
+run test_geometric_dist_cdf_nvrtc_double.cpp ;
+run test_geometric_dist_cdf_nvrtc_float.cpp ;
+run test_geometric_dist_pdf_nvrtc_double.cpp ;
+run test_geometric_dist_pdf_nvrtc_float.cpp ;
+run test_geometric_dist_quan_nvrtc_double.cpp ;
+run test_geometric_dist_quan_nvrtc_float.cpp ;
+
+run test_holtsmark_cdf_nvrtc_double.cpp ;
+run test_holtsmark_cdf_nvrtc_float.cpp ;
+run test_holtsmark_pdf_nvrtc_double.cpp ;
+run test_holtsmark_pdf_nvrtc_float.cpp ;
+run test_holtsmark_quan_nvrtc_double.cpp ;
+run test_holtsmark_quan_nvrtc_float.cpp ;
+
+run test_inverse_chi_squared_cdf_nvrtc_double.cpp ; 
+run test_inverse_chi_squared_cdf_nvrtc_float.cpp ;
+run test_inverse_chi_squared_pdf_nvrtc_double.cpp ; 
+run test_inverse_chi_squared_pdf_nvrtc_float.cpp ;
+run test_inverse_chi_squared_quan_nvrtc_double.cpp ; 
+run test_inverse_chi_squared_quan_nvrtc_float.cpp ;
+
+run test_inverse_gamma_cdf_nvrtc_double.cpp ;
+run test_inverse_gamma_cdf_nvrtc_float.cpp ;
+run test_inverse_gamma_pdf_nvrtc_double.cpp ;
+run test_inverse_gamma_pdf_nvrtc_float.cpp ;
+run test_inverse_gamma_quan_nvrtc_double.cpp ;
+run test_inverse_gamma_quan_nvrtc_float.cpp ;
+
+run test_inverse_gaussian_cdf_nvrtc_double.cpp ;
+run test_inverse_gaussian_cdf_nvrtc_float.cpp ;
+run test_inverse_gaussian_pdf_nvrtc_double.cpp ;
+run test_inverse_gaussian_pdf_nvrtc_float.cpp ;
+run test_inverse_gaussian_quan_nvrtc_double.cpp ;
+run test_inverse_gaussian_quan_nvrtc_float.cpp ;
+
+run test_landau_cdf_nvrtc_double.cpp ;
+run test_landau_cdf_nvrtc_float.cpp ;
+run test_landau_pdf_nvrtc_double.cpp ;
+run test_landau_pdf_nvrtc_float.cpp ;
+run test_landau_quan_nvrtc_double.cpp ;
+run test_landau_quan_nvrtc_float.cpp ;
+
+run test_laplace_cdf_nvrtc_double.cpp ;
+run test_laplace_cdf_nvrtc_float.cpp ;
+run test_laplace_pdf_nvrtc_double.cpp ;
+run test_laplace_pdf_nvrtc_float.cpp ;
+run test_laplace_quan_nvrtc_double.cpp ;
+run test_laplace_quan_nvrtc_float.cpp ;
+
+run test_logistic_cdf_nvrtc_double.cpp ;
+run test_logistic_cdf_nvrtc_float.cpp ;
+run test_logistic_pdf_nvrtc_double.cpp ;
+run test_logistic_pdf_nvrtc_float.cpp ;
+run test_logistic_quan_nvrtc_double.cpp ;
+run test_logistic_quan_nvrtc_float.cpp ;
+
+run test_lognormal_cdf_nvrtc_double.cpp ;
+run test_lognormal_cdf_nvrtc_float.cpp ;
+run test_lognormal_pdf_nvrtc_double.cpp ;
+run test_lognormal_pdf_nvrtc_float.cpp ;
+run test_lognormal_quan_nvrtc_double.cpp ;
+run test_lognormal_quan_nvrtc_float.cpp ;
+
+run test_mapairy_cdf_nvrtc_double.cpp ;
+run test_mapairy_cdf_nvrtc_float.cpp ;
+run test_mapairy_pdf_nvrtc_double.cpp ;
+run test_mapairy_pdf_nvrtc_float.cpp ;
+run test_mapairy_quan_nvrtc_double.cpp ;
+run test_mapairy_quan_nvrtc_float.cpp ;
+
+run test_nc_beta_cdf_nvrtc_double.cpp ;
+run test_nc_beta_cdf_nvrtc_float.cpp ;
+run test_nc_beta_pdf_nvrtc_double.cpp ;
+run test_nc_beta_pdf_nvrtc_float.cpp ;
+run test_nc_beta_quan_nvrtc_double.cpp ;
+run test_nc_beta_quan_nvrtc_float.cpp ;
+
+run test_nc_chi_squared_cdf_nvrtc_double.cpp ;
+run test_nc_chi_squared_cdf_nvrtc_float.cpp ;
+run test_nc_chi_squared_pdf_nvrtc_double.cpp ;
+run test_nc_chi_squared_pdf_nvrtc_float.cpp ;
+run test_nc_chi_squared_quan_nvrtc_double.cpp ;
+run test_nc_chi_squared_quan_nvrtc_float.cpp ;
+
+run test_nc_f_cdf_nvrtc_double.cpp ; 
+run test_nc_f_cdf_nvrtc_float.cpp ;
+run test_nc_f_pdf_nvrtc_double.cpp ;
+run test_nc_f_pdf_nvrtc_float.cpp ;
+run test_nc_f_quan_nvrtc_double.cpp ;
+run test_nc_f_quan_nvrtc_float.cpp ;
+
+run test_negative_binomial_cdf_nvrtc_double.cpp ;
+run test_negative_binomial_cdf_nvrtc_float.cpp ;
+run test_negative_binomial_pdf_nvrtc_double.cpp ;
+run test_negative_binomial_pdf_nvrtc_float.cpp ;
+run test_negative_binomial_quan_nvrtc_double.cpp ;
+run test_negative_binomial_quan_nvrtc_float.cpp ;
+
+run test_normal_cdf_nvrtc_double.cpp ;
+run test_normal_cdf_nvrtc_float.cpp ;
+run test_normal_pdf_nvrtc_double.cpp ;
+run test_normal_pdf_nvrtc_float.cpp ;
+run test_normal_quan_nvrtc_double.cpp ;
+run test_normal_quan_nvrtc_float.cpp ;
+
+run test_pareto_cdf_nvrtc_double.cpp ;
+run test_pareto_cdf_nvrtc_float.cpp ;
+run test_pareto_pdf_nvrtc_double.cpp ;
+run test_pareto_pdf_nvrtc_float.cpp ;
+run test_pareto_quan_nvrtc_double.cpp ;
+run test_pareto_quan_nvrtc_float.cpp ;
+
+run test_poisson_cdf_nvrtc_double.cpp ;
+run test_poisson_cdf_nvrtc_float.cpp ;
+run test_poisson_pdf_nvrtc_double.cpp ;
+run test_poisson_pdf_nvrtc_float.cpp ;
+run test_poisson_quan_nvrtc_double.cpp ;
+run test_poisson_quan_nvrtc_float.cpp ;
+
+run test_rayleigh_cdf_nvrtc_double.cpp ;
+run test_rayleigh_cdf_nvrtc_float.cpp ;
+run test_rayleigh_pdf_nvrtc_double.cpp ;
+run test_rayleigh_pdf_nvrtc_float.cpp ;
+run test_rayleigh_quan_nvrtc_double.cpp ;
+run test_rayleigh_quan_nvrtc_float.cpp ;
+
+run test_saspoint5_cdf_nvrtc_double.cpp ;
+run test_saspoint5_cdf_nvrtc_float.cpp ;
+run test_saspoint5_pdf_nvrtc_double.cpp ;
+run test_saspoint5_pdf_nvrtc_float.cpp ;
+run test_saspoint5_quan_nvrtc_double.cpp ;
+run test_saspoint5_quan_nvrtc_float.cpp ;
+
+run test_students_t_cdf_nvrtc_double.cpp ;
+run test_students_t_cdf_nvrtc_float.cpp ;
+run test_students_t_pdf_nvrtc_double.cpp ;
+run test_students_t_pdf_nvrtc_float.cpp ;
+run test_students_t_quan_nvrtc_double.cpp ;
+run test_students_t_quan_nvrtc_float.cpp ;
+
+run test_triangular_cdf_nvrtc_double.cpp ;
+run test_triangular_cdf_nvrtc_float.cpp ;
+run test_triangular_pdf_nvrtc_double.cpp ;
+run test_triangular_pdf_nvrtc_float.cpp ;
+run test_triangular_quan_nvrtc_double.cpp ;
+run test_triangular_quan_nvrtc_float.cpp ;
+
+run test_uniform_cdf_nvrtc_double.cpp ;
+run test_uniform_cdf_nvrtc_float.cpp ;
+run test_uniform_pdf_nvrtc_double.cpp ;
+run test_uniform_pdf_nvrtc_float.cpp ;
+run test_uniform_quan_nvrtc_double.cpp ;
+run test_uniform_quan_nvrtc_float.cpp ;
+
+run test_weibull_cdf_nvrtc_double.cpp ;
+run test_weibull_cdf_nvrtc_float.cpp ;
+run test_weibull_pdf_nvrtc_double.cpp ;
+run test_weibull_pdf_nvrtc_float.cpp ;
+run test_weibull_quan_nvrtc_double.cpp ;
+run test_weibull_quan_nvrtc_float.cpp ;
+
+# Special Functions
+run test_airy_ai_nvrtc_double.cpp ;
+run test_airy_ai_nvrtc_float.cpp ;
+run test_airy_ai_prime_nvrtc_double.cpp ;
+run test_airy_ai_prime_nvrtc_float.cpp ;
+run test_airy_bi_nvrtc_double.cpp ;
+run test_airy_bi_nvrtc_float.cpp ;
+run test_airy_bi_prime_nvrtc_double.cpp ;
+run test_airy_bi_prime_nvrtc_float.cpp ;
+
+run test_beta_nvrtc_double.cpp ;
+run test_beta_nvrtc_float.cpp ;
+run test_betac_nvrtc_double.cpp ;
+run test_betac_nvrtc_float.cpp ;
+run test_ibeta_nvrtc_double.cpp ;
+run test_ibeta_nvrtc_float.cpp ;
+run test_ibetac_nvrtc_double.cpp ;
+run test_ibetac_nvrtc_float.cpp ;
+run test_ibeta_derivative_nvrtc_double.cpp ;
+run test_ibeta_derivative_nvrtc_float.cpp ;
+run test_ibeta_inv_nvrtc_double.cpp ;
+run test_ibeta_inv_nvrtc_float.cpp ;
+run test_ibeta_inva_nvrtc_double.cpp ;
+run test_ibeta_inva_nvrtc_float.cpp ;
+run test_ibeta_invb_nvrtc_double.cpp ;
+run test_ibeta_invb_nvrtc_float.cpp ;
+run test_ibetac_inv_nvrtc_double.cpp ;
+run test_ibetac_inv_nvrtc_float.cpp ;
+run test_ibetac_inva_nvrtc_double.cpp ;
+run test_ibetac_inva_nvrtc_float.cpp ;
+run test_ibetac_invb_nvrtc_double.cpp ;
+run test_ibetac_invb_nvrtc_float.cpp ;
+
+run test_bessel_i0_nvrtc_double.cpp ;
+run test_bessel_i0_nvrtc_float.cpp ;
+run test_bessel_i1_nvrtc_double.cpp ;
+run test_bessel_i1_nvrtc_float.cpp ;
+run test_bessel_j0_nvrtc_double.cpp ;
+run test_bessel_j0_nvrtc_float.cpp ;
+run test_bessel_j1_nvrtc_double.cpp ;
+run test_bessel_j1_nvrtc_float.cpp ;
+run test_bessel_k0_nvrtc_double.cpp ;
+run test_bessel_k0_nvrtc_float.cpp ;
+run test_bessel_k1_nvrtc_double.cpp ;
+run test_bessel_k1_nvrtc_float.cpp ;
+run test_bessel_kn_nvrtc_double.cpp ;
+run test_bessel_kn_nvrtc_float.cpp ;
+run test_bessel_y0_nvrtc_double.cpp ;
+run test_bessel_y0_nvrtc_float.cpp ;
+run test_bessel_y1_nvrtc_double.cpp ;
+run test_bessel_y1_nvrtc_float.cpp ;
+run test_cyl_bessel_i_nvrtc_double.cpp ;
+run test_cyl_bessel_i_nvrtc_float.cpp ;
+run test_cyl_bessel_j_nvrtc_double.cpp ;
+run test_cyl_bessel_j_nvrtc_float.cpp ;
+run test_cyl_bessel_k_nvrtc_double.cpp ;
+run test_cyl_bessel_k_nvrtc_float.cpp ;
+run test_sph_bessel_nvrtc_double.cpp ;
+run test_sph_bessel_nvrtc_float.cpp ;
+run test_cyl_neumann_nvrtc_double.cpp ;
+run test_cyl_neumann_nvrtc_float.cpp ;
+run test_sph_neumann_nvrtc_double.cpp ;
+run test_sph_neumann_nvrtc_float.cpp ;
+run test_cyl_hankel_1_nvrtc_double.cpp ;
+run test_cyl_hankel_1_nvrtc_float.cpp ;
+run test_cyl_hankel_2_nvrtc_double.cpp ;
+run test_cyl_hankel_2_nvrtc_float.cpp ;
+run test_sph_hankel_1_nvrtc_double.cpp ;
+run test_sph_hankel_1_nvrtc_float.cpp ;
+run test_sph_hankel_2_nvrtc_double.cpp ;
+run test_sph_hankel_2_nvrtc_float.cpp ;
+
+run test_cbrt_nvrtc_double.cpp ;
+run test_cbrt_nvrtc_float.cpp ;
+
+run test_cos_pi_nvrtc_double.cpp ;
+run test_cos_pi_nvrtc_float.cpp ;
+
+run test_digamma_nvrtc_double.cpp ;
+run test_digamma_nvrtc_float.cpp ;
+
+run test_erf_nvrtc_double.cpp ;
+run test_erf_nvrtc_float.cpp ;
+run test_erfc_nvrtc_double.cpp ;
+run test_erfc_nvrtc_float.cpp ;
+run test_erf_inv_nvrtc_double.cpp ;
+run test_erf_inv_nvrtc_float.cpp ;
+run test_erfc_inv_nvrtc_double.cpp ;
+run test_erfc_inv_nvrtc_float.cpp ;
+
+run test_ellint_1_nvrtc_double.cpp ;
+run test_ellint_1_nvrtc_float.cpp ;
+run test_ellint_2_nvrtc_double.cpp ;
+run test_ellint_2_nvrtc_float.cpp ;
+run test_ellint_3_nvrtc_double.cpp ;
+run test_ellint_3_nvrtc_float.cpp ;
+run test_ellint_d_nvrtc_double.cpp ;
+run test_ellint_d_nvrtc_float.cpp ;
+run test_jacobi_zeta_nvrtc_double.cpp ;
+run test_jacobi_zeta_nvrtc_float.cpp ;
+run test_heumann_lambda_nvrtc_double.cpp ;
+run test_heumann_lambda_nvrtc_float.cpp ;
+
+run test_expint_nvrtc_double.cpp ;
+run test_expint_nvrtc_float.cpp ;
+
+run test_expm1_nvrtc_double.cpp ;
+run test_expm1_nvrtc_float.cpp ;
+
+run test_fpclassify_nvrtc_double.cpp ;
+run test_fpclassify_nvrtc_float.cpp ;
+
+run test_gamma_nvrtc_double.cpp ;
+run test_gamma_nvrtc_float.cpp ;
+run test_gamma_p_derivative_nvrtc_double.cpp ;
+run test_gamma_p_derivative_nvrtc_float.cpp ;
+run test_gamma_p_inv_nvrtc_double.cpp ;
+run test_gamma_p_inv_nvrtc_float.cpp ;
+run test_tgamma_ratio_nvrtc_double.cpp ;
+run test_tgamma_ratio_nvrtc_float.cpp ;
+
+run test_gegenbauer_nvrtc_double.cpp ;
+run test_gegenbauer_nvrtc_float.cpp ;
+
+run test_hermite_nvrtc_double.cpp ;
+run test_hermite_nvrtc_float.cpp ;
+
+run test_log1p_nvrtc_double.cpp ;
+run test_log1p_nvrtc_float.cpp ;
+
+run test_modf_nvrtc_double.cpp ;
+run test_modf_nvrtc_float.cpp ;
+
+run test_round_nvrtc_double.cpp ;
+run test_round_nvrtc_float.cpp ;
+
+run test_sign_nvrtc_double.cpp ;
+run test_sign_nvrtc_float.cpp ;
+
+run test_sin_pi_nvrtc_double.cpp ;
+run test_sin_pi_nvrtc_float.cpp ;
+
+run test_trigamma_nvrtc_double.cpp ;
+run test_trigamma_nvrtc_float.cpp ;
+
+run test_trunc_nvrtc_double.cpp ;
diff --git a/test/pow_test.cpp b/test/pow_test.cpp
index ce3d036ab1..24a1cd7f79 100644
--- a/test/pow_test.cpp
+++ b/test/pow_test.cpp
@@ -2,6 +2,7 @@
 //  Tests the pow function
 
 //  (C) Copyright Bruno Lalande 2008.
+//  (C) Copyright Matt Borland 2024.
 //  Distributed under the Boost Software License, Version 1.0.
 //  (See accompanying file LICENSE_1_0.txt or copy at
 //  http://www.boost.org/LICENSE_1_0.txt)
@@ -11,13 +12,12 @@
 #include <iostream>
 
 #include <boost/math/concepts/real_concept.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/typeof/typeof.hpp>
-#include <boost/type_traits/is_same.hpp>
 #include <boost/math/tools/assert.hpp>
 
 #include <boost/math/special_functions/pow.hpp>
@@ -37,7 +37,9 @@ void test_pow(T base)
 
     if ((base == 0) && N < 0)
     {
+       #ifndef BOOST_MATH_NO_EXCEPTIONS 
        BOOST_MATH_CHECK_THROW(math::pow<N>(base), std::overflow_error);
+       #endif
     }
     else
     {
@@ -100,15 +102,15 @@ void test_with_big_exponents()
 
 void test_return_types()
 {
-    static_assert((is_same<BOOST_TYPEOF(pow<2>('\1')), double>::value), "Return type mismatch");
-    static_assert((is_same<BOOST_TYPEOF(pow<2>(L'\2')), double>::value), "Return type mismatch");
-    static_assert((is_same<BOOST_TYPEOF(pow<2>(3)), double>::value), "Return type mismatch");
-    static_assert((is_same<BOOST_TYPEOF(pow<2>(4u)), double>::value), "Return type mismatch");
-    static_assert((is_same<BOOST_TYPEOF(pow<2>(5ul)), double>::value), "Return type mismatch");
-    static_assert((is_same<BOOST_TYPEOF(pow<2>(6.0f)), float>::value), "Return type mismatch");
-    static_assert((is_same<BOOST_TYPEOF(pow<2>(7.0)), double>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>('\1')), double>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>(L'\2')), double>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>(3)), double>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>(4u)), double>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>(5ul)), double>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>(6.0f)), float>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>(7.0)), double>::value), "Return type mismatch");
 #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
-    static_assert((is_same<BOOST_TYPEOF(pow<2>(7.0l)), long double>::value), "Return type mismatch");
+    static_assert((boost::math::is_same<BOOST_TYPEOF(pow<2>(7.0l)), long double>::value), "Return type mismatch");
 #endif
 }
 
diff --git a/test/stopwatch.hpp b/test/stopwatch.hpp
new file mode 100644
index 0000000000..9f3c60de80
--- /dev/null
+++ b/test/stopwatch.hpp
@@ -0,0 +1,39 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_CUDA_STOPWATCH_HPP
+#define BOOST_MATH_CUDA_STOPWATCH_HPP
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include <chrono>
+
+template <class Clock>
+struct stopwatch
+{
+    typedef typename Clock::duration duration;
+    stopwatch()
+    {
+        m_start = Clock::now();
+    }
+    double elapsed()
+    {
+        duration t = Clock::now() - m_start;
+        return std::chrono::duration_cast<std::chrono::duration<double>>(t).count();
+    }
+    void reset()
+    {
+        m_start = Clock::now();
+    }
+
+private:
+    typename Clock::time_point m_start;
+};
+
+typedef stopwatch<std::chrono::high_resolution_clock> watch;
+
+#endif
diff --git a/test/sycl_jamfile b/test/sycl_jamfile
new file mode 100644
index 0000000000..582eaea407
--- /dev/null
+++ b/test/sycl_jamfile
@@ -0,0 +1,95 @@
+# Copyright 2024 Matt Borland
+# Distributed under the Boost Software License, Version 1.0.
+# https://www.boost.org/LICENSE_1_0.txt
+
+import testing ;
+import ../../config/checks/config : requires ;
+
+project : requirements
+    [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
+    ;
+
+# Distributions
+run test_arcsine.cpp ;
+run test_bernoulli.cpp ;
+run test_beta_dist.cpp ;
+run test_cauchy.cpp ;
+run test_chi_squared.cpp ;
+run test_exponential_dist.cpp ;
+run test_extreme_value.cpp ;
+run test_fisher_f.cpp ;
+run test_gamma_dist.cpp ;
+run test_geometric.cpp ;
+run test_holtsmark.cpp ;
+run test_inverse_chi_squared_distribution.cpp ;
+run test_inverse_gamma_distribution.cpp ;
+run test_inverse_gaussian.cpp ;
+run test_landau.cpp ;
+run test_laplace.cpp ;
+run test_logistic_dist.cpp ;
+run test_lognormal.cpp ;
+run test_mapairy.cpp ;
+run test_nc_beta.cpp ;
+run test_nc_chi_squared.cpp ;
+run test_nc_f.cpp ;
+run test_negative_binomial.cpp ;
+run test_normal.cpp ; 
+run test_pareto.cpp ;
+run test_poisson.cpp ;
+run test_rayleigh.cpp ;
+run test_saspoint5.cpp ;
+run test_students_t.cpp ;
+run test_triangular.cpp ;
+run test_uniform.cpp ;
+run test_weibull.cpp ;
+
+# Special Functions
+run pow_test.cpp ;
+
+run test_airy.cpp ;
+
+run test_beta_simple.cpp ;
+run test_beta.cpp ;
+run test_ibeta.cpp ;
+run test_ibeta_inv.cpp ;
+run test_ibeta_inv_ab.cpp ;
+
+run test_bessel_i.cpp ;
+run test_bessel_j.cpp ;
+run test_bessel_k.cpp ;
+run test_bessel_y.cpp ;
+
+run test_cbrt.cpp ;
+
+run test_ellint_1.cpp ;
+run test_ellint_2.cpp ;
+run test_ellint_d.cpp ;
+run test_jacobi_zeta.cpp ;
+run test_heuman_lambda.cpp ;
+
+run test_sign.cpp ;
+
+run test_round.cpp ;
+
+run test_expint.cpp ;
+
+run test_expm1_simple.cpp ;
+
+run gegenbauer_test.cpp ;
+
+run test_hankel.cpp ;
+
+run test_log1p_simple.cpp ;
+
+run test_digamma_simple.cpp ;
+
+run test_trigamma.cpp ;
+
+run test_erf.cpp ;
+
+run test_gamma.cpp ;
+run test_igamma.cpp ;
+run test_igamma_inv.cpp ;
+run test_igamma_inva.cpp ;
+
+run test_hermite.cpp ;
diff --git a/test/test_airy.cpp b/test/test_airy.cpp
index d42fbb4ca3..335c5fd92c 100644
--- a/test/test_airy.cpp
+++ b/test/test_airy.cpp
@@ -3,14 +3,21 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
 
 #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/airy.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
+
 #include <boost/array.hpp>
 #include <iostream>
 #include <iomanip>
@@ -48,8 +55,13 @@ void test_airy(T, const char* name)
    }};
 
    T tol = boost::math::tools::epsilon<T>() * 800;
-   if ((std::numeric_limits<T>::digits > 100) || (std::numeric_limits<T>::digits == 0))
+   if (boost::math::tools::digits<T>() > 100)
       tol *= 2;
+
+   #ifdef SYCL_LANGUAGE_VERSION
+   tol *= 5;
+   #endif
+
    for(unsigned i = 0; i < data.size(); ++i)
    {
       BOOST_CHECK_CLOSE_FRACTION(data[i][1], boost::math::airy_ai(data[i][0]), tol);
diff --git a/test/test_airy_ai_double.cu b/test/test_airy_ai_double.cu
new file mode 100644
index 0000000000..fad46bd9d5
--- /dev/null
+++ b/test/test_airy_ai_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_ai(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_ai_float.cu b/test/test_airy_ai_float.cu
new file mode 100644
index 0000000000..b9149aec39
--- /dev/null
+++ b/test/test_airy_ai_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_ai(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_ai_nvrtc_double.cpp b/test/test_airy_ai_nvrtc_double.cpp
new file mode 100644
index 0000000000..1b918cfef2
--- /dev/null
+++ b/test/test_airy_ai_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_ai_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_ai_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_ai(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_airy_ai_nvrtc_float.cpp b/test/test_airy_ai_nvrtc_float.cpp
new file mode 100644
index 0000000000..6957306426
--- /dev/null
+++ b/test/test_airy_ai_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_ai_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_ai_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_ai(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_airy_ai_prime_double.cu b/test/test_airy_ai_prime_double.cu
new file mode 100644
index 0000000000..1a6bcd7104
--- /dev/null
+++ b/test/test_airy_ai_prime_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai_prime(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_ai_prime(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_ai_prime_float.cu b/test/test_airy_ai_prime_float.cu
new file mode 100644
index 0000000000..df690c2b10
--- /dev/null
+++ b/test/test_airy_ai_prime_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai_prime(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_ai_prime(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_ai_prime_nvrtc_double.cpp b/test/test_airy_ai_prime_nvrtc_double.cpp
new file mode 100644
index 0000000000..1012571761
--- /dev/null
+++ b/test/test_airy_ai_prime_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_ai_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai_prime(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_prime_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_ai_prime_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_prime_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_ai_prime(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_airy_ai_prime_nvrtc_float.cpp b/test/test_airy_ai_prime_nvrtc_float.cpp
new file mode 100644
index 0000000000..c96e044497
--- /dev/null
+++ b/test/test_airy_ai_prime_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_ai_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_ai_prime(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_ai_prime_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_ai_prime_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_ai_prime_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_ai_prime(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_airy_bi_double.cu b/test/test_airy_bi_double.cu
new file mode 100644
index 0000000000..60001a3fe5
--- /dev/null
+++ b/test/test_airy_bi_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_bi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_bi_float.cu b/test/test_airy_bi_float.cu
new file mode 100644
index 0000000000..ed729bfe78
--- /dev/null
+++ b/test/test_airy_bi_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_bi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_bi_nvrtc_double.cpp b/test/test_airy_bi_nvrtc_double.cpp
new file mode 100644
index 0000000000..f69e239163
--- /dev/null
+++ b/test/test_airy_bi_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_bi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_bi_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_bi(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_airy_bi_nvrtc_float.cpp b/test/test_airy_bi_nvrtc_float.cpp
new file mode 100644
index 0000000000..c28a5f5eb0
--- /dev/null
+++ b/test/test_airy_bi_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_bi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_bi_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_bi(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_airy_bi_prime_double.cu b/test/test_airy_bi_prime_double.cu
new file mode 100644
index 0000000000..a73e43f254
--- /dev/null
+++ b/test/test_airy_bi_prime_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi_prime(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_bi_prime(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_bi_prime_float.cu b/test/test_airy_bi_prime_float.cu
new file mode 100644
index 0000000000..36874bccc7
--- /dev/null
+++ b/test/test_airy_bi_prime_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi_prime(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::airy_bi_prime(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_airy_bi_prime_nvrtc_double.cpp b/test/test_airy_bi_prime_nvrtc_double.cpp
new file mode 100644
index 0000000000..802f63a292
--- /dev/null
+++ b/test/test_airy_bi_prime_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_bi_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi_prime(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_prime_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_bi_prime_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_prime_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_bi_prime(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_airy_bi_prime_nvrtc_float.cpp b/test/test_airy_bi_prime_nvrtc_float.cpp
new file mode 100644
index 0000000000..e96aa48b97
--- /dev/null
+++ b/test/test_airy_bi_prime_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/airy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/airy.hpp>
+extern "C" __global__ 
+void test_airy_bi_prime_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::airy_bi_prime(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_airy_bi_prime_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_airy_bi_prime_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_airy_bi_prime_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::airy_bi_prime(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_arcsine.cpp b/test/test_arcsine.cpp
index 0c2d847a96..d1ac803080 100644
--- a/test/test_arcsine.cpp
+++ b/test/test_arcsine.cpp
@@ -10,7 +10,9 @@
 
 // Tests for the arcsine Distribution.
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp> // Must be 1st include, and include_directory /libs/math/src/tr1/ is needed.
+#endif
 
 #ifdef _MSC_VER
 #  pragma warning(disable: 4127) // Conditional expression is constant.
@@ -20,7 +22,6 @@
 
 #include <boost/math/concepts/real_concept.hpp> // for real_concept.
 using ::boost::math::concepts::real_concept;
-#include <boost/math/tools/test.hpp> // for real_concept.
 
 #include <boost/math/distributions/arcsine.hpp> // for arcsine_distribution.
 using boost::math::arcsine_distribution;
@@ -42,6 +43,10 @@ using std::endl;
 #include <limits>
 using std::numeric_limits;
 
+#if defined(BOOST_CHECK_THROW) && defined(BOOST_MATH_NO_EXCEPTIONS)
+#  undef BOOST_CHECK_THROW
+#  define BOOST_CHECK_THROW(x, y)
+#endif
 
 template <class RealType>
 void test_ignore_policy(RealType)
@@ -272,7 +277,7 @@ void test_spots(RealType)
     BOOST_CHECK_EQUAL(kurtosis_excess(arcsine_01), -1.5); // 3/2
     BOOST_CHECK_EQUAL(support(arcsine_01).first, 0); //
     BOOST_CHECK_EQUAL(range(arcsine_01).first, 0); //
-    BOOST_MATH_CHECK_THROW(mode(arcsine_01), std::domain_error); //  Two modes at x_min and x_max, so throw instead.
+    BOOST_CHECK_THROW(mode(arcsine_01), std::domain_error); //  Two modes at x_min and x_max, so throw instead.
 
     // PDF
     // pdf of x = 1/4 is same as reflected value at x = 3/4.
@@ -290,11 +295,13 @@ void test_spots(RealType)
     BOOST_CHECK_CLOSE_FRACTION(pdf(arcsine_01, 0.999999), static_cast<RealType>(318.31004533885312973989414360099118178698415543136L), 100000 * tolerance);// Even less accurate.
 
     // Extreme x.
+    #ifndef BOOST_MATH_ENABLE_SYCL
     if (std::numeric_limits<RealType>::has_infinity)
     { //
       BOOST_CHECK_EQUAL(pdf(arcsine_01, 0), informax<RealType>()); //
       BOOST_CHECK_EQUAL(pdf(arcsine_01, 1), informax<RealType>()); //
     }
+    #endif
 
     BOOST_CHECK_CLOSE_FRACTION(pdf(arcsine_01, tolerance),
       1 /(sqrt(tolerance) * boost::math::constants::pi<RealType>()), 2 * tolerance); //
@@ -439,56 +446,56 @@ void test_spots(RealType)
     BOOST_CHECK_CLOSE_FRACTION(quantile(complement(as_m2m1, static_cast<RealType>(0.85643370687129372924905811522494428117838480010259L))), -static_cast<RealType>(1.95L), 4 * tolerance);
 
     // Tests that should throw:
-    BOOST_MATH_CHECK_THROW(mode(arcsine_distribution<RealType>(static_cast<RealType>(0), static_cast<RealType>(1))), std::domain_error);
+    BOOST_CHECK_THROW(mode(arcsine_distribution<RealType>(static_cast<RealType>(0), static_cast<RealType>(1))), std::domain_error);
     // mode is undefined, and must throw domain_error!
 
 
-    BOOST_MATH_CHECK_THROW( // For various bad arguments.
+    BOOST_CHECK_THROW( // For various bad arguments.
       pdf(
       arcsine_distribution<RealType>(static_cast<RealType>(+1), static_cast<RealType>(-1)), // min_x > max_x
       static_cast<RealType>(1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       pdf(
       arcsine_distribution<RealType>(static_cast<RealType>(1), static_cast<RealType>(0)), // bad constructor parameters.
       static_cast<RealType>(1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       pdf(
       arcsine_distribution<RealType>(static_cast<RealType>(1), static_cast<RealType>(-1)), // bad constructor parameters.
       static_cast<RealType>(1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       pdf(
       arcsine_distribution<RealType>(static_cast<RealType>(1), static_cast<RealType>(1)), // equal constructor parameters.
       static_cast<RealType>(-1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       pdf(
       arcsine_distribution<RealType>(static_cast<RealType>(0), static_cast<RealType>(1)), // bad x > 1.
       static_cast<RealType>(999)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW( // For various bad arguments.
+    BOOST_CHECK_THROW( // For various bad arguments.
       logpdf(
       arcsine_distribution<RealType>(static_cast<RealType>(+1), static_cast<RealType>(-1)), // min_x > max_x
       static_cast<RealType>(1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       logpdf(
       arcsine_distribution<RealType>(static_cast<RealType>(1), static_cast<RealType>(0)), // bad constructor parameters.
       static_cast<RealType>(1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       logpdf(
       arcsine_distribution<RealType>(static_cast<RealType>(1), static_cast<RealType>(-1)), // bad constructor parameters.
       static_cast<RealType>(1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       logpdf(
       arcsine_distribution<RealType>(static_cast<RealType>(1), static_cast<RealType>(1)), // equal constructor parameters.
       static_cast<RealType>(-1)), std::domain_error);
 
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
       logpdf(
       arcsine_distribution<RealType>(static_cast<RealType>(0), static_cast<RealType>(1)), // bad x > 1.
       static_cast<RealType>(999)), std::domain_error);
@@ -496,26 +503,26 @@ void test_spots(RealType)
     // Checks on things that are errors.
 
     // Construction with 'bad' parameters.
-    BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(+1, -1), std::domain_error); // max < min.
-    BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(+1, 0), std::domain_error);  // max < min.
+    BOOST_CHECK_THROW(arcsine_distribution<RealType>(+1, -1), std::domain_error); // max < min.
+    BOOST_CHECK_THROW(arcsine_distribution<RealType>(+1, 0), std::domain_error);  // max < min.
 
     arcsine_distribution<> dist;
-    BOOST_MATH_CHECK_THROW(pdf(dist, -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(logpdf(dist, -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(cdf(dist, -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(cdf(complement(dist, -1)), std::domain_error);
-    BOOST_MATH_CHECK_THROW(quantile(dist, -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error);
-    BOOST_MATH_CHECK_THROW(quantile(dist, -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error);
+    BOOST_CHECK_THROW(pdf(dist, -1), std::domain_error);
+    BOOST_CHECK_THROW(logpdf(dist, -1), std::domain_error);
+    BOOST_CHECK_THROW(cdf(dist, -1), std::domain_error);
+    BOOST_CHECK_THROW(cdf(complement(dist, -1)), std::domain_error);
+    BOOST_CHECK_THROW(quantile(dist, -1), std::domain_error);
+    BOOST_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error);
+    BOOST_CHECK_THROW(quantile(dist, -1), std::domain_error);
+    BOOST_CHECK_THROW(quantile(complement(dist, -1)), std::domain_error);
 
     // Various combinations of bad constructor and member function parameters.
-    BOOST_MATH_CHECK_THROW(pdf(boost::math::arcsine_distribution<RealType>(0, 1), -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(pdf(boost::math::arcsine_distribution<RealType>(-1, 1), +2), std::domain_error);
-    BOOST_MATH_CHECK_THROW(logpdf(boost::math::arcsine_distribution<RealType>(0, 1), -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(logpdf(boost::math::arcsine_distribution<RealType>(-1, 1), +2), std::domain_error);
-    BOOST_MATH_CHECK_THROW(quantile(boost::math::arcsine_distribution<RealType>(1, 1), -1), std::domain_error);
-    BOOST_MATH_CHECK_THROW(quantile(boost::math::arcsine_distribution<RealType>(1, 1), 2), std::domain_error);
+    BOOST_CHECK_THROW(pdf(boost::math::arcsine_distribution<RealType>(0, 1), -1), std::domain_error);
+    BOOST_CHECK_THROW(pdf(boost::math::arcsine_distribution<RealType>(-1, 1), +2), std::domain_error);
+    BOOST_CHECK_THROW(logpdf(boost::math::arcsine_distribution<RealType>(0, 1), -1), std::domain_error);
+    BOOST_CHECK_THROW(logpdf(boost::math::arcsine_distribution<RealType>(-1, 1), +2), std::domain_error);
+    BOOST_CHECK_THROW(quantile(boost::math::arcsine_distribution<RealType>(1, 1), -1), std::domain_error);
+    BOOST_CHECK_THROW(quantile(boost::math::arcsine_distribution<RealType>(1, 1), 2), std::domain_error);
 
     // No longer allow any parameter to be NaN or inf, so all these tests should throw.
     if (std::numeric_limits<RealType>::has_quiet_NaN)
@@ -523,23 +530,23 @@ void test_spots(RealType)
       // Attempt to construct from non-finite parameters should throw.
       RealType nan = std::numeric_limits<RealType>::quiet_NaN();
 #ifndef BOOST_NO_EXCEPTIONS
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType> w(nan), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType> w(1, nan), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType> w(nan, 1), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType> w(nan), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType> w(1, nan), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType> w(nan, 1), std::domain_error);
 #else
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(nan), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(1, nan), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(nan, 1), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType>(nan), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType>(1, nan), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType>(nan, 1), std::domain_error);
 #endif
 
       arcsine_distribution<RealType> w(RealType(-1), RealType(+1));
       // NaN parameters to member functions should throw.
-      BOOST_MATH_CHECK_THROW(pdf(w, +nan), std::domain_error); // x = NaN
-      BOOST_MATH_CHECK_THROW(logpdf(w, +nan), std::domain_error); // x = NaN
-      BOOST_MATH_CHECK_THROW(cdf(w, +nan), std::domain_error); // x = NaN
-      BOOST_MATH_CHECK_THROW(cdf(complement(w, +nan)), std::domain_error); // x = + nan
-      BOOST_MATH_CHECK_THROW(quantile(w, +nan), std::domain_error); // p = + nan
-      BOOST_MATH_CHECK_THROW(quantile(complement(w, +nan)), std::domain_error); // p = + nan
+      BOOST_CHECK_THROW(pdf(w, +nan), std::domain_error); // x = NaN
+      BOOST_CHECK_THROW(logpdf(w, +nan), std::domain_error); // x = NaN
+      BOOST_CHECK_THROW(cdf(w, +nan), std::domain_error); // x = NaN
+      BOOST_CHECK_THROW(cdf(complement(w, +nan)), std::domain_error); // x = + nan
+      BOOST_CHECK_THROW(quantile(w, +nan), std::domain_error); // p = + nan
+      BOOST_CHECK_THROW(quantile(complement(w, +nan)), std::domain_error); // p = + nan
     } // has_quiet_NaN
 
     if (std::numeric_limits<RealType>::has_infinity)
@@ -547,27 +554,27 @@ void test_spots(RealType)
       // Attempt to construct from non-finite should throw.
       RealType inf = std::numeric_limits<RealType>::infinity();
 #ifndef BOOST_NO_EXCEPTIONS
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType> w(inf), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType> w(1, inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType> w(inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType> w(1, inf), std::domain_error);
 #else
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(inf), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(1, inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType>(inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType>(1, inf), std::domain_error);
 #endif
       // Infinite parameters to member functions should throw.
       arcsine_distribution<RealType> w(RealType(0), RealType(1));
 #ifndef BOOST_NO_EXCEPTIONS
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType> w(inf), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType> w(1, inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType> w(inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType> w(1, inf), std::domain_error);
 #else
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(inf), std::domain_error);
-      BOOST_MATH_CHECK_THROW(arcsine_distribution<RealType>(1, inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType>(inf), std::domain_error);
+      BOOST_CHECK_THROW(arcsine_distribution<RealType>(1, inf), std::domain_error);
 #endif
-      BOOST_MATH_CHECK_THROW(pdf(w, +inf), std::domain_error); // x = inf
-      BOOST_MATH_CHECK_THROW(logpdf(w, +inf), std::domain_error); // x = inf
-      BOOST_MATH_CHECK_THROW(cdf(w, +inf), std::domain_error); // x = inf
-      BOOST_MATH_CHECK_THROW(cdf(complement(w, +inf)), std::domain_error); // x = + inf
-      BOOST_MATH_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf
-      BOOST_MATH_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf
+      BOOST_CHECK_THROW(pdf(w, +inf), std::domain_error); // x = inf
+      BOOST_CHECK_THROW(logpdf(w, +inf), std::domain_error); // x = inf
+      BOOST_CHECK_THROW(cdf(w, +inf), std::domain_error); // x = inf
+      BOOST_CHECK_THROW(cdf(complement(w, +inf)), std::domain_error); // x = + inf
+      BOOST_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf
+      BOOST_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf
     } // has_infinity
 
     // Error handling checks:
@@ -601,7 +608,7 @@ void test_spots(RealType)
     BOOST_CHECK_EQUAL(kurtosis_excess(as), -1.5); // 3/2
     BOOST_CHECK_EQUAL(support(as).first, 0); //
     BOOST_CHECK_EQUAL(range(as).first, 0); //
-    BOOST_MATH_CHECK_THROW(mode(as), std::domain_error); //  Two modes at x_min and x_max, so throw instead.
+    BOOST_CHECK_THROW(mode(as), std::domain_error); //  Two modes at x_min and x_max, so throw instead.
 
     // (Parameter value, arbitrarily zero, only communicates the floating point type).
     test_spots(0.0F); // Test float.
diff --git a/test/test_arcsine_cdf_double.cu b/test/test_arcsine_cdf_double.cu
new file mode 100644
index 0000000000..3ac9e22cd0
--- /dev/null
+++ b/test/test_arcsine_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::arcsine_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_arcsine_cdf_float.cu b/test/test_arcsine_cdf_float.cu
new file mode 100644
index 0000000000..cc73ce95bd
--- /dev/null
+++ b/test/test_arcsine_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::arcsine_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_arcsine_cdf_nvrtc_double.cpp b/test/test_arcsine_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..58b6b8297f
--- /dev/null
+++ b/test/test_arcsine_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/arcsine.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::arcsine_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_arcsine_cdf_nvrtc_float.cpp b/test/test_arcsine_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..3f8b04bb0b
--- /dev/null
+++ b/test/test_arcsine_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/arcsine.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::arcsine_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_arcsine_pdf_double.cu b/test/test_arcsine_pdf_double.cu
new file mode 100644
index 0000000000..8f45017ba8
--- /dev/null
+++ b/test/test_arcsine_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::arcsine_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_arcsine_pdf_float.cu b/test/test_arcsine_pdf_float.cu
new file mode 100644
index 0000000000..c236b7876f
--- /dev/null
+++ b/test/test_arcsine_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::arcsine_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_arcsine_pdf_nvrtc_double.cpp b/test/test_arcsine_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..c76b47f883
--- /dev/null
+++ b/test/test_arcsine_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/arcsine.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::arcsine_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_arcsine_pdf_nvrtc_float.cpp b/test/test_arcsine_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..e9145a1624
--- /dev/null
+++ b/test/test_arcsine_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/arcsine.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::arcsine_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_arcsine_quan_double.cu b/test/test_arcsine_quan_double.cu
new file mode 100644
index 0000000000..a457370635
--- /dev/null
+++ b/test/test_arcsine_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::arcsine_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_arcsine_quan_float.cu b/test/test_arcsine_quan_float.cu
new file mode 100644
index 0000000000..fd8cd11fcc
--- /dev/null
+++ b/test/test_arcsine_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::arcsine_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_arcsine_quantile_nvrtc_double.cpp b/test/test_arcsine_quantile_nvrtc_double.cpp
new file mode 100644
index 0000000000..ba8e2e5df5
--- /dev/null
+++ b/test/test_arcsine_quantile_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/arcsine.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::arcsine_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_arcsine_quantile_nvrtc_float.cpp b/test/test_arcsine_quantile_nvrtc_float.cpp
new file mode 100644
index 0000000000..1fd2e4884a
--- /dev/null
+++ b/test/test_arcsine_quantile_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/arcsine.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::arcsine_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::arcsine_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_arcsine_range_support_double.cu b/test/test_arcsine_range_support_double.cu
new file mode 100644
index 0000000000..b3fb575faa
--- /dev/null
+++ b/test/test_arcsine_range_support_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = range(boost::math::arcsine_distribution<float_type>(in1[i])).first + support(boost::math::arcsine_distribution<float_type>(in1[i])).second;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(range(boost::math::arcsine_distribution<float_type>(input_vector1[i])).first + support(boost::math::arcsine_distribution<float_type>(input_vector1[i])).second);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_arcsine_range_support_float.cu b/test/test_arcsine_range_support_float.cu
new file mode 100644
index 0000000000..d207d0598e
--- /dev/null
+++ b/test/test_arcsine_range_support_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/arcsine.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = range(boost::math::arcsine_distribution<float_type>(in1[i])).first + support(boost::math::arcsine_distribution<float_type>(in1[i])).second;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(range(boost::math::arcsine_distribution<float_type>(input_vector1[i])).first + support(boost::math::arcsine_distribution<float_type>(input_vector1[i])).second);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_autodiff_2.cpp b/test/test_autodiff_2.cpp
index 79ee24ba0d..7315e7dc1b 100644
--- a/test/test_autodiff_2.cpp
+++ b/test/test_autodiff_2.cpp
@@ -522,6 +522,17 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(asinh_test, T, bin_float_types) {
                     -39 / (16 * boost::math::constants::root_two<T>()), eps);
 }
 
+template<typename T>
+static T atan2_wrap(T x, T y)
+{
+    return atan2(x, y);
+}
+
+static long double atan2_wrap(long double x, long double y)
+{
+    return std::atan2(x, y);
+}
+
 BOOST_AUTO_TEST_CASE_TEMPLATE(atan2_function, T, all_float_types) {
   using test_constants = test_constants_t<T>;
   using std::atan2;
@@ -536,7 +547,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(atan2_function, T, all_float_types) {
     auto y = y_sampler.next();
 
     auto autodiff_v = atan2(make_fvar<T, m>(x), make_fvar<T, m>(y));
-    auto anchor_v = atan2(x, y);
+    auto anchor_v = atan2_wrap(x, y);
     BOOST_CHECK_CLOSE(autodiff_v, anchor_v,
                       5000 * test_constants::pct_epsilon());
   }
diff --git a/test/test_bernoulli.cpp b/test/test_bernoulli.cpp
index d8c663399a..8513cec36d 100644
--- a/test/test_bernoulli.cpp
+++ b/test/test_bernoulli.cpp
@@ -2,6 +2,7 @@
 
 // Copyright John Maddock 2006.
 // Copyright  Paul A. Bristow 2007, 2012.
+// Copyright Matt Borland 2024
 
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
@@ -22,7 +23,7 @@
 
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 
 #include <boost/math/distributions/bernoulli.hpp> // for bernoulli_distribution
 using boost::math::bernoulli_distribution;
@@ -74,6 +75,7 @@ void test_spots(RealType)
   BOOST_CHECK_EQUAL(bernoulli_distribution<RealType>(static_cast<RealType>(0.1L)).success_fraction(), static_cast<RealType>(0.1L));
   BOOST_CHECK_EQUAL(bernoulli_distribution<RealType>(static_cast<RealType>(0.9L)).success_fraction(), static_cast<RealType>(0.9L));
 
+#ifndef BOOST_MATH_NO_EXCEPTIONS
   BOOST_MATH_CHECK_THROW( // Constructor success_fraction outside 0 to 1.
        bernoulli_distribution<RealType>(static_cast<RealType>(2)), std::domain_error);
   BOOST_MATH_CHECK_THROW(
@@ -86,7 +88,8 @@ void test_spots(RealType)
   BOOST_MATH_CHECK_THROW(
        pdf( // pdf k neither 0 nor 1.
           bernoulli_distribution<RealType>(static_cast<RealType>(0.25L)), static_cast<RealType>(2)), std::domain_error);
- 
+#endif
+
   BOOST_CHECK_EQUAL(
     pdf( // OK k (or n)
     bernoulli_distribution<RealType>(static_cast<RealType>(0.5L)), static_cast<RealType>(0)),
@@ -134,6 +137,7 @@ void test_spots(RealType)
        static_cast<RealType>(5.11111111111111111111111111111111111111111111L),
        tolerance);
 
+#ifndef BOOST_MATH_NO_EXCEPTIONS
   BOOST_MATH_CHECK_THROW(
      quantile(
         bernoulli_distribution<RealType>(static_cast<RealType>(2)), // prob >1
@@ -154,6 +158,7 @@ void test_spots(RealType)
         bernoulli_distribution<RealType>(static_cast<RealType>(0.5L)), // k < 0
         static_cast<RealType>(2)), std::domain_error
      );
+#endif
 
   BOOST_CHECK_CLOSE_FRACTION(
      cdf(
@@ -217,6 +222,7 @@ void test_spots(RealType)
 
    // Checks for 'bad' parameters.
    // Construction.
+   #ifndef BOOST_MATH_NO_EXCEPTIONS
    BOOST_MATH_CHECK_THROW(bernoulli_distribution<RealType>(-1), std::domain_error); // p outside 0 to 1.
    BOOST_MATH_CHECK_THROW(bernoulli_distribution<RealType>(+2), std::domain_error); // p outside 0 to 1.
 
@@ -269,7 +275,7 @@ void test_spots(RealType)
      BOOST_MATH_CHECK_THROW(quantile(w, +inf), std::domain_error); // p = + inf
      BOOST_MATH_CHECK_THROW(quantile(complement(w, +inf)), std::domain_error); // p = + inf
    } // has_infinity
-
+   #endif
 } // template <class RealType>void test_spots(RealType)
 
 BOOST_AUTO_TEST_CASE( test_main )
@@ -302,7 +308,9 @@ BOOST_AUTO_TEST_CASE( test_main )
   // (Parameter value, arbitrarily zero, only communicates the floating point type).
   test_spots(0.0F); // Test float.
   test_spots(0.0); // Test double.
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
   test_spots(0.0L); // Test long double.
+#endif
 #if !BOOST_WORKAROUND(BOOST_BORLANDC, BOOST_TESTED_AT(0x582)) && !defined(BOOST_MATH_NO_REAL_CONCEPT_TESTS)
   test_spots(boost::math::concepts::real_concept(0.)); // Test real concept.
 #endif
diff --git a/test/test_bernoulli_cdf_double.cu b/test/test_bernoulli_cdf_double.cu
new file mode 100644
index 0000000000..1a6dce645e
--- /dev/null
+++ b/test/test_bernoulli_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_bernoulli_cdf_float.cu b/test/test_bernoulli_cdf_float.cu
new file mode 100644
index 0000000000..998f247361
--- /dev/null
+++ b/test/test_bernoulli_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_bernoulli_cdf_nvrtc_double.cpp b/test/test_bernoulli_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..887c3430a0
--- /dev/null
+++ b/test/test_bernoulli_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/bernoulli.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::bernoulli_distribution<float_type>(), round(in1[i]));
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::bernoulli_distribution<float_type>(), round(h_in1[i]));
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bernoulli_cdf_nvrtc_float.cpp b/test/test_bernoulli_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..1a38c43480
--- /dev/null
+++ b/test/test_bernoulli_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/bernoulli.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::bernoulli_distribution<float_type>(), round(in1[i]));
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::bernoulli_distribution<float_type>(), round(h_in1[i]));
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bernoulli_constants.cpp b/test/test_bernoulli_constants.cpp
index 6d73d82a4c..0c5c81b62f 100644
--- a/test/test_bernoulli_constants.cpp
+++ b/test/test_bernoulli_constants.cpp
@@ -10,7 +10,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/math/concepts/real_concept.hpp>
 #include <boost/math/special_functions/bernoulli.hpp>
-#include <libs/math/test/table_type.hpp>
+#include "table_type.hpp"
 #include <boost/math/tools/test.hpp>
 #include <iostream>
 #include <iomanip>
@@ -22,114 +22,114 @@ void test(const char* name)
 {
    std::cout << "Testing type " << name << ":\n";
 
-   static const typename table_type<T>::type data[] = 
+   static const typename table_type<T>::type data[] =
    {
       /* First 50 from 2 to 100 inclusive: */
       /* TABLE[N[BernoulliB[n], 200], {n,2,100,2}] */
 
-      SC_(0.1666666666666666666666666666666666666666), 
-      SC_(-0.0333333333333333333333333333333333333333), 
-      SC_(0.0238095238095238095238095238095238095238), 
-      SC_(-0.0333333333333333333333333333333333333333), 
-      SC_(0.0757575757575757575757575757575757575757), 
-      SC_(-0.2531135531135531135531135531135531135531), 
-      SC_(1.1666666666666666666666666666666666666666), 
-      SC_(-7.0921568627450980392156862745098039215686), 
-      SC_(54.9711779448621553884711779448621553884711), 
-      SC_(-529.1242424242424242424242424242424242424242), 
-      SC_(6192.1231884057971014492753623188405797101449), 
-      SC_(-86580.2531135531135531135531135531135531135531), 
-      SC_(1.4255171666666666666666666666666666666666e6), 
-      SC_(-2.7298231067816091954022988505747126436781e7), 
-      SC_(6.0158087390064236838430386817483591677140e8), 
-      SC_(-1.5116315767092156862745098039215686274509e10), 
-      SC_(4.2961464306116666666666666666666666666666e11), 
-      SC_(-1.3711655205088332772159087948561632772159e13), 
-      SC_(4.8833231897359316666666666666666666666666e14), 
-      SC_(-1.9296579341940068148632668144863266814486e16), 
-      SC_(8.4169304757368261500055370985603543743078e17), 
-      SC_(-4.0338071854059455413076811594202898550724e19), 
-      SC_(2.1150748638081991605601453900709219858156e21), 
-      SC_(-1.2086626522296525934602731193708252531781e23), 
-      SC_(7.5008667460769643668557200757575757575757e24), 
-      SC_(-5.0387781014810689141378930305220125786163e26), 
-      SC_(3.6528776484818123335110430842971177944862e28), 
-      SC_(-2.8498769302450882226269146432910678160919e30), 
-      SC_(2.3865427499683627644645981919219214971751e32), 
-      SC_(-2.1399949257225333665810744765191097392674e34), 
-      SC_(2.0500975723478097569921733095672310251666e36), 
-      SC_(-2.0938005911346378409095185290027970184709e38), 
-      SC_(2.2752696488463515559649260352769264581469e40), 
-      SC_(-2.6257710286239576047303049736158202081449e42), 
-      SC_(3.2125082102718032518204792304264985243521e44), 
-      SC_(-4.1598278166794710913917074495262358936689e46), 
-      SC_(5.6920695482035280023883456219121058644480e48), 
-      SC_(-8.2183629419784575692290653468617333014550e50), 
-      SC_(1.2502904327166993016732339829702895524177e53), 
-      SC_(-2.0015583233248370274925329198813298768724e55), 
-      SC_(3.3674982915364374233396676903338753016219e57), 
-      SC_(-5.9470970503135447718660496844051540840579e59), 
-      SC_(1.1011910323627977559564130790437691604630e62), 
-      SC_(-2.1355259545253501188658385019041065678973e64), 
-      SC_(4.3328896986641192419616613059379206218451e66), 
-      SC_(-9.1885528241669328226200555215501897138960e68), 
-      SC_(2.0346896776329074493455027990220020065975e71), 
-      SC_(-4.7003833958035731078575255535006060654596e73), 
-      SC_(1.1318043445484249270675186257733934267890e76), 
+      SC_(0.1666666666666666666666666666666666666666),
+      SC_(-0.0333333333333333333333333333333333333333),
+      SC_(0.0238095238095238095238095238095238095238),
+      SC_(-0.0333333333333333333333333333333333333333),
+      SC_(0.0757575757575757575757575757575757575757),
+      SC_(-0.2531135531135531135531135531135531135531),
+      SC_(1.1666666666666666666666666666666666666666),
+      SC_(-7.0921568627450980392156862745098039215686),
+      SC_(54.9711779448621553884711779448621553884711),
+      SC_(-529.1242424242424242424242424242424242424242),
+      SC_(6192.1231884057971014492753623188405797101449),
+      SC_(-86580.2531135531135531135531135531135531135531),
+      SC_(1.4255171666666666666666666666666666666666e6),
+      SC_(-2.7298231067816091954022988505747126436781e7),
+      SC_(6.0158087390064236838430386817483591677140e8),
+      SC_(-1.5116315767092156862745098039215686274509e10),
+      SC_(4.2961464306116666666666666666666666666666e11),
+      SC_(-1.3711655205088332772159087948561632772159e13),
+      SC_(4.8833231897359316666666666666666666666666e14),
+      SC_(-1.9296579341940068148632668144863266814486e16),
+      SC_(8.4169304757368261500055370985603543743078e17),
+      SC_(-4.0338071854059455413076811594202898550724e19),
+      SC_(2.1150748638081991605601453900709219858156e21),
+      SC_(-1.2086626522296525934602731193708252531781e23),
+      SC_(7.5008667460769643668557200757575757575757e24),
+      SC_(-5.0387781014810689141378930305220125786163e26),
+      SC_(3.6528776484818123335110430842971177944862e28),
+      SC_(-2.8498769302450882226269146432910678160919e30),
+      SC_(2.3865427499683627644645981919219214971751e32),
+      SC_(-2.1399949257225333665810744765191097392674e34),
+      SC_(2.0500975723478097569921733095672310251666e36),
+      SC_(-2.0938005911346378409095185290027970184709e38),
+      SC_(2.2752696488463515559649260352769264581469e40),
+      SC_(-2.6257710286239576047303049736158202081449e42),
+      SC_(3.2125082102718032518204792304264985243521e44),
+      SC_(-4.1598278166794710913917074495262358936689e46),
+      SC_(5.6920695482035280023883456219121058644480e48),
+      SC_(-8.2183629419784575692290653468617333014550e50),
+      SC_(1.2502904327166993016732339829702895524177e53),
+      SC_(-2.0015583233248370274925329198813298768724e55),
+      SC_(3.3674982915364374233396676903338753016219e57),
+      SC_(-5.9470970503135447718660496844051540840579e59),
+      SC_(1.1011910323627977559564130790437691604630e62),
+      SC_(-2.1355259545253501188658385019041065678973e64),
+      SC_(4.3328896986641192419616613059379206218451e66),
+      SC_(-9.1885528241669328226200555215501897138960e68),
+      SC_(2.0346896776329074493455027990220020065975e71),
+      SC_(-4.7003833958035731078575255535006060654596e73),
+      SC_(1.1318043445484249270675186257733934267890e76),
       SC_(-2.8382249570693706959264156336481764738284e78),
 
       /* next 50 from 102 to 200: */
       /* TABLE[N[BernoulliB[n], 200], {n,102,200,2}] */
 
-      SC_(7.4064248979678850629750827140920984176879e80), 
-      SC_(-2.0096454802756604483465619672715363186867e83), 
-      SC_(5.6657170050805941445719346030519356961419e85), 
-      SC_(-1.6584511154136216915823713374319912301494e88), 
-      SC_(5.0368859950492377419289421915180154812442e90), 
-      SC_(-1.5861468237658186369363401572966438782740e93), 
-      SC_(5.1756743617545626984073240682507122561240e95), 
-      SC_(-1.7488921840217117339690025877618159145141e98), 
-      SC_(6.1160519994952185255824525264264167780767e100), 
-      SC_(-2.2122776912707834942288323456712932445573e103), 
-      SC_(8.2722776798770969854221062459984595731204e105), 
-      SC_(-3.1958925111415709583591634369180814873526e108), 
-      SC_(1.2750082223387792982310024302926679866957e111), 
-      SC_(-5.2500923086774133899402824624565175446919e113), 
-      SC_(2.2301817894241625209869298198838728143738e116), 
-      SC_(-9.7684521930955204438633513398980239301166e118), 
-      SC_(4.4098361978452954272272622874813169191875e121), 
-      SC_(-2.0508570886464088839729337727583015486456e124), 
-      SC_(9.8214433279791277107572969602097521041491e126), 
-      SC_(-4.8412600798208880508789196709963412761130e129), 
-      SC_(2.4553088801480982609783467404088690399673e132), 
-      SC_(-1.2806926804084747548782513278601785721811e135), 
-      SC_(6.8676167104668581192101888598464400436092e137), 
-      SC_(-3.7846468581969104694978995416379556814489e140), 
-      SC_(2.1426101250665291550871323135148272096660e143), 
-      SC_(-1.2456727137183695007019642961637607219458e146), 
-      SC_(7.4345787551000152543679668394052061311780e148), 
-      SC_(-4.5535795304641704894063333223321274876772e151), 
-      SC_(2.8612112816858868345363847251017232522918e154), 
-      SC_(-1.8437723552033869727688202653628785487541e157), 
-      SC_(1.2181154536221046699501316506599521355817e160), 
-      SC_(-8.2482187185314121548481845729689344730141e162), 
-      SC_(5.7225877937832943329651649814297861591868e165), 
-      SC_(-4.0668530525059104726767969383115865560219e168), 
-      SC_(2.9596092064642050062875269581585187042637e171), 
-      SC_(-2.2049522565189457509031175227344598483637e174), 
-      SC_(1.6812597072889599805831152515136066575446e177), 
-      SC_(-1.3116736213556957648645280635581715300443e180), 
-      SC_(1.0467894009478038082183285392982308964382e183), 
-      SC_(-8.5432893578833707718598254629908277459327e185), 
-      SC_(7.1287821322486542352288406677143822472124e188), 
-      SC_(-6.0802931455535899300084711868647745846198e191), 
-      SC_(5.2996776424849923930094291004324726622848e194), 
-      SC_(-4.7194259168745862644364622901337991110376e197), 
-      SC_(4.2928413791402981089416829654107466904552e200), 
-      SC_(-3.9876744968232207443447765554293879510665e203), 
-      SC_(3.7819780419358882713894418116139332789822e206), 
-      SC_(-3.6614233683681191243685808215119734875519e209), 
-      SC_(3.6176090272372862348855460929891408947754e212), 
+      SC_(7.4064248979678850629750827140920984176879e80),
+      SC_(-2.0096454802756604483465619672715363186867e83),
+      SC_(5.6657170050805941445719346030519356961419e85),
+      SC_(-1.6584511154136216915823713374319912301494e88),
+      SC_(5.0368859950492377419289421915180154812442e90),
+      SC_(-1.5861468237658186369363401572966438782740e93),
+      SC_(5.1756743617545626984073240682507122561240e95),
+      SC_(-1.7488921840217117339690025877618159145141e98),
+      SC_(6.1160519994952185255824525264264167780767e100),
+      SC_(-2.2122776912707834942288323456712932445573e103),
+      SC_(8.2722776798770969854221062459984595731204e105),
+      SC_(-3.1958925111415709583591634369180814873526e108),
+      SC_(1.2750082223387792982310024302926679866957e111),
+      SC_(-5.2500923086774133899402824624565175446919e113),
+      SC_(2.2301817894241625209869298198838728143738e116),
+      SC_(-9.7684521930955204438633513398980239301166e118),
+      SC_(4.4098361978452954272272622874813169191875e121),
+      SC_(-2.0508570886464088839729337727583015486456e124),
+      SC_(9.8214433279791277107572969602097521041491e126),
+      SC_(-4.8412600798208880508789196709963412761130e129),
+      SC_(2.4553088801480982609783467404088690399673e132),
+      SC_(-1.2806926804084747548782513278601785721811e135),
+      SC_(6.8676167104668581192101888598464400436092e137),
+      SC_(-3.7846468581969104694978995416379556814489e140),
+      SC_(2.1426101250665291550871323135148272096660e143),
+      SC_(-1.2456727137183695007019642961637607219458e146),
+      SC_(7.4345787551000152543679668394052061311780e148),
+      SC_(-4.5535795304641704894063333223321274876772e151),
+      SC_(2.8612112816858868345363847251017232522918e154),
+      SC_(-1.8437723552033869727688202653628785487541e157),
+      SC_(1.2181154536221046699501316506599521355817e160),
+      SC_(-8.2482187185314121548481845729689344730141e162),
+      SC_(5.7225877937832943329651649814297861591868e165),
+      SC_(-4.0668530525059104726767969383115865560219e168),
+      SC_(2.9596092064642050062875269581585187042637e171),
+      SC_(-2.2049522565189457509031175227344598483637e174),
+      SC_(1.6812597072889599805831152515136066575446e177),
+      SC_(-1.3116736213556957648645280635581715300443e180),
+      SC_(1.0467894009478038082183285392982308964382e183),
+      SC_(-8.5432893578833707718598254629908277459327e185),
+      SC_(7.1287821322486542352288406677143822472124e188),
+      SC_(-6.0802931455535899300084711868647745846198e191),
+      SC_(5.2996776424849923930094291004324726622848e194),
+      SC_(-4.7194259168745862644364622901337991110376e197),
+      SC_(4.2928413791402981089416829654107466904552e200),
+      SC_(-3.9876744968232207443447765554293879510665e203),
+      SC_(3.7819780419358882713894418116139332789822e206),
+      SC_(-3.6614233683681191243685808215119734875519e209),
+      SC_(3.6176090272372862348855460929891408947754e212),
       SC_(-3.6470772645191354362138308865549944904868e215),
    };
 
diff --git a/test/test_bernoulli_pdf_double.cu b/test/test_bernoulli_pdf_double.cu
new file mode 100644
index 0000000000..147e2f3401
--- /dev/null
+++ b/test/test_bernoulli_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_bernoulli_pdf_float.cu b/test/test_bernoulli_pdf_float.cu
new file mode 100644
index 0000000000..49eaea32f9
--- /dev/null
+++ b/test/test_bernoulli_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::bernoulli_distribution<float_type>(in1[i]), static_cast<float_type>(1));
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::bernoulli_distribution<float_type>(input_vector1[i]), static_cast<float_type>(1)));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_bernoulli_pdf_nvrtc_double.cpp b/test/test_bernoulli_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f3e21a0951
--- /dev/null
+++ b/test/test_bernoulli_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/bernoulli.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::bernoulli_distribution<float_type>(), round(in1[i]));
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::bernoulli_distribution<float_type>(), round(h_in1[i]));
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bernoulli_pdf_nvrtc_float.cpp b/test/test_bernoulli_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..bf9b760168
--- /dev/null
+++ b/test/test_bernoulli_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/bernoulli.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::bernoulli_distribution<float_type>(), round(in1[i]));
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::bernoulli_distribution<float_type>(), round(h_in1[i]));
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bernoulli_quan_nvrtc_double.cpp b/test/test_bernoulli_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..dcdd0e1f1b
--- /dev/null
+++ b/test/test_bernoulli_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/bernoulli.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::bernoulli_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::bernoulli_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bernoulli_quan_nvrtc_float.cpp b/test/test_bernoulli_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..24c4923818
--- /dev/null
+++ b/test/test_bernoulli_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/bernoulli.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::bernoulli_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::bernoulli_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bernoulli_range_support_double.cu b/test/test_bernoulli_range_support_double.cu
new file mode 100644
index 0000000000..ade952fca3
--- /dev/null
+++ b/test/test_bernoulli_range_support_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = range(boost::math::bernoulli_distribution<float_type>(in1[i])).first + support(boost::math::bernoulli_distribution<float_type>(in1[i])).second;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(range(boost::math::bernoulli_distribution<float_type>(input_vector1[i])).first + support(boost::math::bernoulli_distribution<float_type>(input_vector1[i])).second);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_bernoulli_range_support_float.cu b/test/test_bernoulli_range_support_float.cu
new file mode 100644
index 0000000000..ef276b9384
--- /dev/null
+++ b/test/test_bernoulli_range_support_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/bernoulli.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = range(boost::math::bernoulli_distribution<float_type>(in1[i])).first + support(boost::math::bernoulli_distribution<float_type>(in1[i])).second;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(range(boost::math::bernoulli_distribution<float_type>(input_vector1[i])).first + support(boost::math::bernoulli_distribution<float_type>(input_vector1[i])).second);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_bessel_i.cpp b/test/test_bessel_i.cpp
index 68dcab0a5d..817569760a 100644
--- a/test/test_bessel_i.cpp
+++ b/test/test_bessel_i.cpp
@@ -3,7 +3,21 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#else
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+#include <boost/math/tools/config.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
 #include "test_bessel_i.hpp"
 
 //
@@ -82,7 +96,11 @@ void expected_results()
       "linux",                       // platform
       largest_type,                  // test type(s)
       ".*Random.*",                    // test data group
+      #ifdef SYCL_LANGUAGE_VERSION
+      ".*", 600, 200);
+      #else
       ".*", 400, 200);               // test function
+      #endif
 
    add_expected_result(
       "GNU.*",                       // compiler
@@ -111,7 +129,11 @@ void expected_results()
       ".*",                          // platform
       largest_type,                  // test type(s)
       ".*",                          // test data group
+      #ifdef SYCL_LANGUAGE_VERSION
+      ".*", 400, 200);
+      #else
       ".*", 20, 10);                 // test function
+      #endif
    //
    // Set error rates a little higher for real_concept - 
    // now that we use a series approximation for small z
diff --git a/test/test_bessel_i.hpp b/test/test_bessel_i.hpp
index 2da559f320..aa4f6a4ea3 100644
--- a/test/test_bessel_i.hpp
+++ b/test/test_bessel_i.hpp
@@ -9,6 +9,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/bessel.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
 #include "functor.hpp"
@@ -180,7 +181,10 @@ void test_bessel(T, const char* name)
     //
     // Special cases for full coverage:
     //
+    #ifndef BOOST_MATH_NO_EXCEPTIONS
     BOOST_CHECK_THROW(boost::math::cyl_bessel_i(T(-2.5), T(-2.5)), std::domain_error);
+    #endif
+
     BOOST_CHECK_EQUAL(boost::math::cyl_bessel_i(T(0), T(0)), T(1));
     BOOST_CHECK_EQUAL(boost::math::cyl_bessel_i(T(10), T(0)), T(0));
     BOOST_CHECK_EQUAL(boost::math::cyl_bessel_i(T(-10), T(0)), T(0));
@@ -197,10 +201,12 @@ void test_bessel(T, const char* name)
        }
     }
     T tolerance = boost::math::tools::epsilon<T>() * 100;
+#ifndef SYCL_LANGUAGE_VERSION
     if ((boost::math::tools::digits<T>() <= std::numeric_limits<double>::digits) && (std::numeric_limits<T>::max_exponent > 1000))
     {
        BOOST_CHECK_CLOSE_FRACTION(boost::math::cyl_bessel_i(T(0.5), T(710)), SC_(3.3447452278080108123142599104927325061327359278058601201179e306), tolerance);
     }
+#endif
 #if LDBL_MAX_EXP >= 11356
     BOOST_IF_CONSTEXPR (std::numeric_limits<T>::max_exponent >= 11356)
     {
diff --git a/test/test_bessel_i0_double.cu b/test/test_bessel_i0_double.cu
new file mode 100644
index 0000000000..1c5d0ca14b
--- /dev/null
+++ b/test/test_bessel_i0_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i0(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_i0(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_i0_float.cu b/test/test_bessel_i0_float.cu
new file mode 100644
index 0000000000..39929d5481
--- /dev/null
+++ b/test/test_bessel_i0_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i0(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_i0(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_i0_nvrtc_double.cpp b/test/test_bessel_i0_nvrtc_double.cpp
new file mode 100644
index 0000000000..0c5db47b49
--- /dev/null
+++ b/test/test_bessel_i0_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_i0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_i0.hpp>
+extern "C" __global__ 
+void test_bessel_i0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i0(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_i0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_i0(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_i0_nvrtc_float.cpp b/test/test_bessel_i0_nvrtc_float.cpp
new file mode 100644
index 0000000000..26d667b973
--- /dev/null
+++ b/test/test_bessel_i0_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_i0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_i0.hpp>
+extern "C" __global__ 
+void test_bessel_i0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i0(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_i0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_i0(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_i1_double.cu b/test/test_bessel_i1_double.cu
new file mode 100644
index 0000000000..e4d6443a68
--- /dev/null
+++ b/test/test_bessel_i1_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_i1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_i1_float.cu b/test/test_bessel_i1_float.cu
new file mode 100644
index 0000000000..12ae535428
--- /dev/null
+++ b/test/test_bessel_i1_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_i1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_i1_nvrtc_double.cpp b/test/test_bessel_i1_nvrtc_double.cpp
new file mode 100644
index 0000000000..c270a66940
--- /dev/null
+++ b/test/test_bessel_i1_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_i1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_i1.hpp>
+extern "C" __global__ 
+void test_bessel_i1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_i1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_i1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_i1_nvrtc_float.cpp b/test/test_bessel_i1_nvrtc_float.cpp
new file mode 100644
index 0000000000..158c6a8159
--- /dev/null
+++ b/test/test_bessel_i1_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_i1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_i1.hpp>
+extern "C" __global__ 
+void test_bessel_i1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_i1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_i1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_i1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_i1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_i1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_j.cpp b/test/test_bessel_j.cpp
index 19a5f7426e..1dd63a68a5 100644
--- a/test/test_bessel_j.cpp
+++ b/test/test_bessel_j.cpp
@@ -3,7 +3,20 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#else
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+#include <boost/math/tools/config.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
 
 #include "test_bessel_j.hpp"
 
diff --git a/test/test_bessel_j.hpp b/test/test_bessel_j.hpp
index 82106213ea..c0b719ad89 100644
--- a/test/test_bessel_j.hpp
+++ b/test/test_bessel_j.hpp
@@ -9,6 +9,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/bessel.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
@@ -279,7 +280,9 @@ void test_bessel(T, const char* name)
     BOOST_MATH_CHECK_THROW(boost::math::sph_bessel(2, T(-2.0)), std::domain_error);
     BOOST_CHECK_EQUAL(boost::math::cyl_bessel_j(T(0), T(2.5)), boost::math::cyl_bessel_j(T(0), T(-2.5)));
     BOOST_CHECK_EQUAL(boost::math::cyl_bessel_j(T(1), T(2.5)), -boost::math::cyl_bessel_j(T(1), T(-2.5)));
+    #ifndef SYCL_LANGUAGE_VERSION
     BOOST_CHECK_CLOSE_FRACTION(boost::math::cyl_bessel_j(364, T(38.5)), SC_(1.793940496519190500748409872348034004417458734118663909894e-309), tolerance);
+    #endif
     //
     // Special cases at infinity:
     //
diff --git a/test/test_bessel_j0_double.cu b/test/test_bessel_j0_double.cu
new file mode 100644
index 0000000000..d32474d964
--- /dev/null
+++ b/test/test_bessel_j0_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j0(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_j0(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_j0_float.cu b/test/test_bessel_j0_float.cu
new file mode 100644
index 0000000000..48c6b9e399
--- /dev/null
+++ b/test/test_bessel_j0_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j0(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_j0(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_j0_nvrtc_double.cpp b/test/test_bessel_j0_nvrtc_double.cpp
new file mode 100644
index 0000000000..8c8b798410
--- /dev/null
+++ b/test/test_bessel_j0_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_j0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_j0.hpp>
+extern "C" __global__ 
+void test_bessel_j0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j0(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_j0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_j0(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_j0_nvrtc_float.cpp b/test/test_bessel_j0_nvrtc_float.cpp
new file mode 100644
index 0000000000..4a54b1eaa8
--- /dev/null
+++ b/test/test_bessel_j0_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_j0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_j0.hpp>
+extern "C" __global__ 
+void test_bessel_j0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j0(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_j0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_j0(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_j1_double.cu b/test/test_bessel_j1_double.cu
new file mode 100644
index 0000000000..33a6e71b6e
--- /dev/null
+++ b/test/test_bessel_j1_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_j1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_j1_float.cu b/test/test_bessel_j1_float.cu
new file mode 100644
index 0000000000..14dd37be31
--- /dev/null
+++ b/test/test_bessel_j1_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_j1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_j1_nvrtc_double.cpp b/test/test_bessel_j1_nvrtc_double.cpp
new file mode 100644
index 0000000000..11460c11da
--- /dev/null
+++ b/test/test_bessel_j1_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_j1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_j1.hpp>
+extern "C" __global__ 
+void test_bessel_j1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_j1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_j1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_j1_nvrtc_float.cpp b/test/test_bessel_j1_nvrtc_float.cpp
new file mode 100644
index 0000000000..8f7cc6e3fe
--- /dev/null
+++ b/test/test_bessel_j1_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_j1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_j1.hpp>
+extern "C" __global__ 
+void test_bessel_j1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_j1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_j1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_j1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_j1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_j1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_k.cpp b/test/test_bessel_k.cpp
index f0975b46d2..6c31f5ab05 100644
--- a/test/test_bessel_k.cpp
+++ b/test/test_bessel_k.cpp
@@ -5,13 +5,26 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#else
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+#include <boost/math/tools/config.hpp>
+#endif
 
 #ifdef _MSC_VER
 #  pragma warning(disable : 4756) // overflow in constant arithmetic
 // Constants are too big for float case, but this doesn't matter for test.
 #endif
 
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
 #include "test_bessel_k.hpp"
 
 //
diff --git a/test/test_bessel_k.hpp b/test/test_bessel_k.hpp
index 22df3218f0..6a2a8179d9 100644
--- a/test/test_bessel_k.hpp
+++ b/test/test_bessel_k.hpp
@@ -9,6 +9,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/bessel.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
 #include "functor.hpp"
@@ -175,6 +176,7 @@ void test_bessel(T, const char* name)
     //
     // Extra test coverage:
     //
+    #ifndef SYCL_LANGUAGE_VERSION // SYCL doesn't throw 
     BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(2), T(-1)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(2.2), T(-1)), std::domain_error);
     BOOST_IF_CONSTEXPR(std::numeric_limits<T>::has_infinity)
@@ -194,6 +196,7 @@ void test_bessel(T, const char* name)
     BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(-1.25), T(0)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(-1), T(0)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::cyl_bessel_k(T(1), T(0)), std::domain_error);
+    #endif
 }
 
 
diff --git a/test/test_bessel_k0_double.cu b/test/test_bessel_k0_double.cu
new file mode 100644
index 0000000000..26d0e2bffa
--- /dev/null
+++ b/test/test_bessel_k0_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k0(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_k0(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_k0_float.cu b/test/test_bessel_k0_float.cu
new file mode 100644
index 0000000000..ffe59c25bd
--- /dev/null
+++ b/test/test_bessel_k0_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k0(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_k0(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_k0_nvrtc_double.cpp b/test/test_bessel_k0_nvrtc_double.cpp
new file mode 100644
index 0000000000..d412212125
--- /dev/null
+++ b/test/test_bessel_k0_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_k0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_k0.hpp>
+extern "C" __global__ 
+void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k0(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_k0(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_k0_nvrtc_float.cpp b/test/test_bessel_k0_nvrtc_float.cpp
new file mode 100644
index 0000000000..389fce21a4
--- /dev/null
+++ b/test/test_bessel_k0_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_k0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_k0.hpp>
+extern "C" __global__ 
+void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k0(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_k0(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_k1_double.cu b/test/test_bessel_k1_double.cu
new file mode 100644
index 0000000000..ed1b353d93
--- /dev/null
+++ b/test/test_bessel_k1_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_k1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_k1_float.cu b/test/test_bessel_k1_float.cu
new file mode 100644
index 0000000000..65fd802f22
--- /dev/null
+++ b/test/test_bessel_k1_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::detail::bessel_k1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_k1_nvrtc_double.cpp b/test/test_bessel_k1_nvrtc_double.cpp
new file mode 100644
index 0000000000..1e0f1e7f4b
--- /dev/null
+++ b/test/test_bessel_k1_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_k1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_k1.hpp>
+extern "C" __global__ 
+void test_bessel_k1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_k1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_k1_nvrtc_float.cpp b/test/test_bessel_k1_nvrtc_float.cpp
new file mode 100644
index 0000000000..1422a58869
--- /dev/null
+++ b/test/test_bessel_k1_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_k1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_k1.hpp>
+extern "C" __global__ 
+void test_bessel_k1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_k1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_k1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_kn_double.cu b/test/test_bessel_kn_double.cu
new file mode 100644
index 0000000000..d15ba73041
--- /dev/null
+++ b/test/test_bessel_kn_double.cu
@@ -0,0 +1,105 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_kn(2, in[i], pol);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    boost::math::policies::policy<> pol;
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(boost::math::detail::bessel_kn(2, input_vector[i], pol));
+    }
+
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_kn_float.cu b/test/test_bessel_kn_float.cu
new file mode 100644
index 0000000000..d15ba73041
--- /dev/null
+++ b/test/test_bessel_kn_float.cu
@@ -0,0 +1,105 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_kn(2, in[i], pol);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    boost::math::policies::policy<> pol;
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(boost::math::detail::bessel_kn(2, input_vector[i], pol));
+    }
+
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_kn_nvrtc_double.cpp b/test/test_bessel_kn_nvrtc_double.cpp
new file mode 100644
index 0000000000..3b581f77c3
--- /dev/null
+++ b/test/test_bessel_kn_nvrtc_double.cpp
@@ -0,0 +1,192 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_kn.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_kn.hpp>
+extern "C" __global__ 
+void test_bessel_kn_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_kn(2, in1[i], pol);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_kn_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_kn_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_kn_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        boost::math::policies::policy<> pol;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_kn(2, h_in1[i], pol);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_kn_nvrtc_float.cpp b/test/test_bessel_kn_nvrtc_float.cpp
new file mode 100644
index 0000000000..dcc987a70a
--- /dev/null
+++ b/test/test_bessel_kn_nvrtc_float.cpp
@@ -0,0 +1,192 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/detail/bessel_kn.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/detail/bessel_kn.hpp>
+extern "C" __global__ 
+void test_bessel_kn_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_kn(2, in1[i], pol);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_kn_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_kn_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_kn_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        boost::math::policies::policy<> pol;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_kn(2, h_in1[i], pol);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_y.cpp b/test/test_bessel_y.cpp
index 83c24b95f4..8251920c5b 100644
--- a/test/test_bessel_y.cpp
+++ b/test/test_bessel_y.cpp
@@ -3,7 +3,20 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#else
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+#include <boost/math/tools/config.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
 
 #include "test_bessel_y.hpp"
 
@@ -234,7 +247,11 @@ void expected_results()
       ".*",                          // platform
       largest_type,                  // test type(s)
       ".*(Y[nv]|y).*Random.*",           // test data group
+      #ifdef SYCL_LANGUAGE_VERSION
+      ".*", 2000, 1000);
+      #else
       ".*", 1500, 1000);               // test function
+      #endif
    //
    // Fallback for sun has to go after the general cases above:
    //
diff --git a/test/test_bessel_y.hpp b/test/test_bessel_y.hpp
index 28361a227c..14b0be4564 100644
--- a/test/test_bessel_y.hpp
+++ b/test/test_bessel_y.hpp
@@ -9,6 +9,7 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/bessel.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
@@ -241,10 +242,12 @@ void test_bessel(T, const char* name)
        BOOST_CHECK_EQUAL(boost::math::sph_neumann(2, std::numeric_limits<T>::infinity()), T(0));
     }
 
+    #ifndef BOOST_MATH_NO_EXCEPTIONS
     BOOST_CHECK_THROW(boost::math::cyl_neumann(T(0), T(-1)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::cyl_neumann(T(0.2), T(-1)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::cyl_neumann(T(2), T(0)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::sph_neumann(2, T(-2)), std::domain_error);
+    #endif
 #if LDBL_MAX_EXP > 1024
     if (std::numeric_limits<T>::max_exponent > 1024)
     {
diff --git a/test/test_bessel_y0_double.cu b/test/test_bessel_y0_double.cu
new file mode 100644
index 0000000000..c8deada7d7
--- /dev/null
+++ b/test/test_bessel_y0_double.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y0(in[i], pol);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+
+    boost::math::policies::policy<> pol;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(boost::math::detail::bessel_y0(input_vector[i], pol));
+    }
+    
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_y0_float.cu b/test/test_bessel_y0_float.cu
new file mode 100644
index 0000000000..c8deada7d7
--- /dev/null
+++ b/test/test_bessel_y0_float.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y0(in[i], pol);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+
+    boost::math::policies::policy<> pol;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(boost::math::detail::bessel_y0(input_vector[i], pol));
+    }
+    
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_y0_nvrtc_double.cpp b/test/test_bessel_y0_nvrtc_double.cpp
new file mode 100644
index 0000000000..8645a0fdd0
--- /dev/null
+++ b/test/test_bessel_y0_nvrtc_double.cpp
@@ -0,0 +1,194 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y0.hpp>
+extern "C" __global__ 
+void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y0(in1[i], pol);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        boost::math::policies::policy<> pol;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_y0(h_in1[i], pol);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_y0_nvrtc_float.cpp b/test/test_bessel_y0_nvrtc_float.cpp
new file mode 100644
index 0000000000..75a065bd6c
--- /dev/null
+++ b/test/test_bessel_y0_nvrtc_float.cpp
@@ -0,0 +1,194 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y0.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y0.hpp>
+extern "C" __global__ 
+void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y0(in1[i], pol);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        boost::math::policies::policy<> pol;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_y0(h_in1[i], pol);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_y1_double.cu b/test/test_bessel_y1_double.cu
new file mode 100644
index 0000000000..a5b3051b40
--- /dev/null
+++ b/test/test_bessel_y1_double.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y1(in[i], pol);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+
+    boost::math::policies::policy<> pol;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(boost::math::detail::bessel_y1(input_vector[i], pol));
+    }
+    
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_y1_float.cu b/test/test_bessel_y1_float.cu
new file mode 100644
index 0000000000..532aaf328d
--- /dev/null
+++ b/test/test_bessel_y1_float.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y1(in[i], pol);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+
+    boost::math::policies::policy<> pol;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(boost::math::detail::bessel_y1(input_vector[i], pol));
+    }
+    
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_bessel_y1_nvrtc_double.cpp b/test/test_bessel_y1_nvrtc_double.cpp
new file mode 100644
index 0000000000..383d879eb1
--- /dev/null
+++ b/test/test_bessel_y1_nvrtc_double.cpp
@@ -0,0 +1,194 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y1.hpp>
+extern "C" __global__ 
+void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y1(in1[i], pol);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        boost::math::policies::policy<> pol;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_y1(h_in1[i], pol);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_bessel_y1_nvrtc_float.cpp b/test/test_bessel_y1_nvrtc_float.cpp
new file mode 100644
index 0000000000..c2c1355e64
--- /dev/null
+++ b/test/test_bessel_y1_nvrtc_float.cpp
@@ -0,0 +1,194 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/policies/policy.hpp>
+#include <boost/math/special_functions/detail/bessel_y1.hpp>
+extern "C" __global__ 
+void test_bessel_k0_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    boost::math::policies::policy<> pol;
+    if (i < numElements)
+    {
+        out[i] = boost::math::detail::bessel_y1(in1[i], pol);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_bessel_k0_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_bessel_k0_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_bessel_k0_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        boost::math::policies::policy<> pol;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::detail::bessel_y1(h_in1[i], pol);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta.cpp b/test/test_beta.cpp
index b24cb32c07..4e27b71353 100644
--- a/test/test_beta.cpp
+++ b/test/test_beta.cpp
@@ -5,7 +5,17 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#include "pch_light.hpp"
+#ifndef SYCL_LANGUAGE_VERSION
+#include <pch_light.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
 
 #include "test_beta.hpp"
 
diff --git a/test/test_beta.hpp b/test/test_beta.hpp
index 63a07d18f5..362bf51bf2 100644
--- a/test/test_beta.hpp
+++ b/test/test_beta.hpp
@@ -10,13 +10,18 @@
 # pragma warning (disable : 4180) // qualifier applied to function type has no meaning; ignored
 #endif
 
+#ifdef __CUDACC__
+#pragma nv_diag_suppress 221
+#endif
+
 #include <boost/math/concepts/real_concept.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
+#include <boost/math/special_functions/beta.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
@@ -100,14 +105,17 @@ void test_spots(T)
    BOOST_CHECK_CLOSE(::boost::math::beta(small, static_cast<T>(4)), 1/small, tolerance);
    BOOST_CHECK_CLOSE(::boost::math::beta(small, small / 2), boost::math::tgamma(small) * boost::math::tgamma(small / 2) / boost::math::tgamma(small + small / 2), tolerance);
    BOOST_CHECK_CLOSE(::boost::math::beta(static_cast<T>(4), static_cast<T>(20)), static_cast<T>(0.00002823263692828910220214568040654997176736L), tolerance);
-   if ((std::numeric_limits<T>::digits < 100) && (std::numeric_limits<T>::digits != 0))
+   if (boost::math::tools::digits<T>() < 100)
    {
       // Inexact input, so disable for ultra precise long doubles:
       BOOST_CHECK_CLOSE(::boost::math::beta(static_cast<T>(0.0125L), static_cast<T>(0.000023L)), static_cast<T>(43558.24045647538375006349016083320744662L), tolerance * 2);
    }
+
+   #ifndef BOOST_MATH_NO_EXCEPTIONS
    BOOST_CHECK_THROW(boost::math::beta(static_cast<T>(0), static_cast<T>(1)), std::domain_error);
    BOOST_CHECK_THROW(boost::math::beta(static_cast<T>(-1), static_cast<T>(1)), std::domain_error);
    BOOST_CHECK_THROW(boost::math::beta(static_cast<T>(1), static_cast<T>(-1)), std::domain_error);
    BOOST_CHECK_THROW(boost::math::beta(static_cast<T>(1), static_cast<T>(0)), std::domain_error);
+   #endif
 }
 
diff --git a/test/test_beta_dist.cpp b/test/test_beta_dist.cpp
index 943718a39f..1652309eb7 100644
--- a/test/test_beta_dist.cpp
+++ b/test/test_beta_dist.cpp
@@ -32,9 +32,14 @@
 # pragma warning (disable : 4224) // nonstandard extension used : formal parameter 'arg' was previously defined as a type.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
-#include <boost/math/tools/test.hpp>
+#endif
+
+#include "../include_private/boost/math/tools/test.hpp"
 
 #include <boost/math/distributions/beta.hpp> // for beta_distribution
 using boost::math::beta_distribution;
@@ -634,12 +639,13 @@ BOOST_AUTO_TEST_CASE( test_main )
    BOOST_CHECK_CLOSE_FRACTION(mybeta22.find_alpha(mybeta22.beta(), 0.8, cdf(mybeta22, 0.8)), mybeta22.alpha(), tol);
    BOOST_CHECK_CLOSE_FRACTION(mybeta22.find_beta(mybeta22.alpha(), 0.8, cdf(mybeta22, 0.8)), mybeta22.beta(), tol);
 
-
+   #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
    beta_distribution<real_concept> rcbeta22(2, 2); // Using RealType real_concept.
    cout << "numeric_limits<real_concept>::is_specialized " << numeric_limits<real_concept>::is_specialized << endl;
    cout << "numeric_limits<real_concept>::digits " << numeric_limits<real_concept>::digits << endl;
    cout << "numeric_limits<real_concept>::digits10 " << numeric_limits<real_concept>::digits10 << endl;
    cout << "numeric_limits<real_concept>::epsilon " << numeric_limits<real_concept>::epsilon() << endl;
+   #endif
 
    // (Parameter value, arbitrarily zero, only communicates the floating point type).
    test_spots(0.0F); // Test float.
diff --git a/test/test_beta_dist_cdf_double.cu b/test/test_beta_dist_cdf_double.cu
new file mode 100644
index 0000000000..9188f4305f
--- /dev/null
+++ b/test/test_beta_dist_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::beta_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_beta_dist_cdf_float.cu b/test/test_beta_dist_cdf_float.cu
new file mode 100644
index 0000000000..0278f64155
--- /dev/null
+++ b/test/test_beta_dist_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::beta_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_beta_dist_cdf_nvrtc_double.cpp b/test/test_beta_dist_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..4f5913c108
--- /dev/null
+++ b/test/test_beta_dist_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/beta.hpp>
+extern "C" __global__ 
+void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_dist_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::beta_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_dist_cdf_nvrtc_float.cpp b/test/test_beta_dist_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..f5b031c5a9
--- /dev/null
+++ b/test/test_beta_dist_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/beta.hpp>
+extern "C" __global__ 
+void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_dist_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::beta_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_dist_pdf_double.cu b/test/test_beta_dist_pdf_double.cu
new file mode 100644
index 0000000000..e86cf94dd8
--- /dev/null
+++ b/test/test_beta_dist_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::beta_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_beta_dist_pdf_float.cu b/test/test_beta_dist_pdf_float.cu
new file mode 100644
index 0000000000..97dd606f2f
--- /dev/null
+++ b/test/test_beta_dist_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::beta_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_beta_dist_pdf_nvrtc_double.cpp b/test/test_beta_dist_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..c9870e2ce4
--- /dev/null
+++ b/test/test_beta_dist_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/beta.hpp>
+extern "C" __global__ 
+void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_dist_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::beta_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_dist_pdf_nvrtc_float.cpp b/test/test_beta_dist_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..0b4fd83488
--- /dev/null
+++ b/test/test_beta_dist_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/beta.hpp>
+extern "C" __global__ 
+void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_dist_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::beta_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_dist_quan_double.cu b/test/test_beta_dist_quan_double.cu
new file mode 100644
index 0000000000..a6b842e8ef
--- /dev/null
+++ b/test/test_beta_dist_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::beta_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_beta_dist_quan_float.cu b/test/test_beta_dist_quan_float.cu
new file mode 100644
index 0000000000..48a860f4c2
--- /dev/null
+++ b/test/test_beta_dist_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::beta_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_beta_dist_quan_nvrtc_double.cpp b/test/test_beta_dist_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..9726bf019e
--- /dev/null
+++ b/test/test_beta_dist_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/beta.hpp>
+extern "C" __global__ 
+void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_dist_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::beta_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_dist_quan_nvrtc_float.cpp b/test/test_beta_dist_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..d2476cb2ac
--- /dev/null
+++ b/test/test_beta_dist_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/beta.hpp>
+extern "C" __global__ 
+void test_beta_dist_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::beta_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_dist_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_dist_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_dist_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::beta_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_double.cu b/test/test_beta_double.cu
new file mode 100644
index 0000000000..cd58601584
--- /dev/null
+++ b/test/test_beta_double.cu
@@ -0,0 +1,132 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::beta(in1[i], in2[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "beta_med_data.ipp"
+#include "beta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2;
+
+    for(unsigned i = 0; i < beta_med_data.size(); ++i)
+    {
+       v1.push_back(beta_med_data[i][0]);
+       v2.push_back(beta_med_data[i][1]);
+    }
+    for(unsigned i = 0; i < beta_small_data.size(); ++i)
+    {
+       v1.push_back(beta_small_data[i][0]);
+       v2.push_back(beta_small_data[i][1]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::beta(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_beta_float.cu b/test/test_beta_float.cu
new file mode 100644
index 0000000000..c4c078f373
--- /dev/null
+++ b/test/test_beta_float.cu
@@ -0,0 +1,130 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type * in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::beta(in1[i], in2[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "beta_med_data.ipp"
+#include "beta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2;
+
+    for(unsigned i = 0; i < beta_med_data.size(); ++i)
+    {
+       v1.push_back(beta_med_data[i][0]);
+       v2.push_back(beta_med_data[i][1]);
+    }
+    for(unsigned i = 0; i < beta_small_data.size(); ++i)
+    {
+       v1.push_back(beta_small_data[i][0]);
+       v2.push_back(beta_small_data[i][1]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::beta(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
diff --git a/test/test_beta_nvrtc_double.cpp b/test/test_beta_nvrtc_double.cpp
new file mode 100644
index 0000000000..fdc502a195
--- /dev/null
+++ b/test/test_beta_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_beta_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::beta(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::beta(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_nvrtc_float.cpp b/test/test_beta_nvrtc_float.cpp
new file mode 100644
index 0000000000..d403d33155
--- /dev/null
+++ b/test/test_beta_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_beta_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::beta(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::beta(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_beta_simple.cpp b/test/test_beta_simple.cpp
new file mode 100644
index 0000000000..436b14ab46
--- /dev/null
+++ b/test/test_beta_simple.cpp
@@ -0,0 +1,38 @@
+// Copyright John Maddock 2006.
+// Copyright Paul A. Bristow 2007, 2009
+// Copyright Matt Borland 2024
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <boost/math/concepts/real_concept.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include "math_unit_test.hpp"
+
+template <class T>
+void test_spots(T)
+{
+    //
+    // Basic sanity checks, tolerance is 20 epsilon expressed as a percentage:
+    //
+    T tolerance = 20;
+    T small = boost::math::tools::epsilon<T>() / 1024;
+    CHECK_ULP_CLOSE(::boost::math::beta(static_cast<T>(1), static_cast<T>(1)), static_cast<T>(1), tolerance);
+    CHECK_ULP_CLOSE(::boost::math::beta(static_cast<T>(1), static_cast<T>(4)), static_cast<T>(0.25), tolerance);
+    CHECK_ULP_CLOSE(::boost::math::beta(static_cast<T>(4), static_cast<T>(1)), static_cast<T>(0.25), tolerance);
+    CHECK_ULP_CLOSE(::boost::math::beta(small, static_cast<T>(4)), 1/small, tolerance);
+    CHECK_ULP_CLOSE(::boost::math::beta(static_cast<T>(4), small), 1/small, tolerance);
+    CHECK_ULP_CLOSE(::boost::math::beta(small, static_cast<T>(4)), 1/small, tolerance);
+    CHECK_ULP_CLOSE(::boost::math::beta(static_cast<T>(4), static_cast<T>(20)), static_cast<T>(0.00002823263692828910220214568040654997176736L), tolerance);
+}
+
+int main()
+{
+    test_spots(0.0F);
+    test_spots(0.0);
+
+    return boost::math::test::report_errors();
+}
diff --git a/test/test_betac_double.cu b/test/test_betac_double.cu
new file mode 100644
index 0000000000..8bb31d3219
--- /dev/null
+++ b/test/test_betac_double.cu
@@ -0,0 +1,146 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::betac(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "beta_med_data.ipp"
+#include "beta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < beta_med_data.size(); ++i)
+    {
+       v1.push_back(beta_med_data[i][0]);
+       v2.push_back(beta_med_data[i][1]);
+       v3.push_back(beta_med_data[i][2]);
+    }
+    for(unsigned i = 0; i < beta_small_data.size(); ++i)
+    {
+       v1.push_back(beta_small_data[i][0]);
+       v2.push_back(beta_small_data[i][1]);
+       v3.push_back(beta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::betac(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_betac_float.cu b/test/test_betac_float.cu
new file mode 100644
index 0000000000..7070c567cc
--- /dev/null
+++ b/test/test_betac_float.cu
@@ -0,0 +1,146 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::betac(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "beta_med_data.ipp"
+#include "beta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < beta_med_data.size(); ++i)
+    {
+       v1.push_back(beta_med_data[i][0]);
+       v2.push_back(beta_med_data[i][1]);
+       v3.push_back(beta_med_data[i][2]);
+    }
+    for(unsigned i = 0; i < beta_small_data.size(); ++i)
+    {
+       v1.push_back(beta_small_data[i][0]);
+       v2.push_back(beta_small_data[i][1]);
+       v3.push_back(beta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::betac(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_betac_nvrtc_double.cpp b/test/test_betac_nvrtc_double.cpp
new file mode 100644
index 0000000000..0667cfe0d4
--- /dev/null
+++ b/test/test_betac_nvrtc_double.cpp
@@ -0,0 +1,196 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_beta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::betac(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+            h_in3[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::betac(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_betac_nvrtc_float.cpp b/test/test_betac_nvrtc_float.cpp
new file mode 100644
index 0000000000..0667cfe0d4
--- /dev/null
+++ b/test/test_betac_nvrtc_float.cpp
@@ -0,0 +1,196 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_beta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::betac(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+            h_in3[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::betac(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_binomial.cpp b/test/test_binomial.cpp
index 8aec49e4f8..ef7f171723 100644
--- a/test/test_binomial.cpp
+++ b/test/test_binomial.cpp
@@ -27,7 +27,6 @@
 // Enable C++ Exceptions Yes With SEH Exceptions (/EHa) prevents warning 4535.
 #endif
 
-#include <boost/math/tools/test.hpp>
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
 
@@ -522,57 +521,57 @@ void test_spots(RealType T)
           binomial_distribution<RealType>(static_cast<RealType>(0), static_cast<RealType>(0.25)),
           static_cast<RealType>(0)), static_cast<RealType>(1)
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        pdf(
           binomial_distribution<RealType>(static_cast<RealType>(-1), static_cast<RealType>(0.25)),
           static_cast<RealType>(0)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        pdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(-0.25)),
           static_cast<RealType>(0)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        pdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(1.25)),
           static_cast<RealType>(0)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        pdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(0.25)),
           static_cast<RealType>(-1)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        pdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(0.25)),
           static_cast<RealType>(9)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        cdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(0.25)),
           static_cast<RealType>(-1)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        cdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(0.25)),
           static_cast<RealType>(9)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        cdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(-0.25)),
           static_cast<RealType>(0)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        cdf(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(1.25)),
           static_cast<RealType>(0)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        quantile(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(-0.25)),
           static_cast<RealType>(0)), std::domain_error
        );
-    BOOST_MATH_CHECK_THROW(
+    BOOST_CHECK_THROW(
        quantile(
           binomial_distribution<RealType>(static_cast<RealType>(8), static_cast<RealType>(1.25)),
           static_cast<RealType>(0)), std::domain_error
diff --git a/test/test_cauchy.cpp b/test/test_cauchy.cpp
index 002690e5f8..f5893264a6 100644
--- a/test/test_cauchy.cpp
+++ b/test/test_cauchy.cpp
@@ -18,17 +18,16 @@
 
 // #define BOOST_MATH_ASSERT_UNDEFINED_POLICY false 
 // To compile even if Cauchy mean is used.
-#include <boost/math/tools/test.hpp>
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 #include <boost/math/distributions/cauchy.hpp>
     using boost::math::cauchy_distribution;
 
-#include "test_out_of_range.hpp"
-
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
+#include "test_out_of_range.hpp"
+
 #include <iostream>
    using std::cout;
    using std::endl;
@@ -38,11 +37,11 @@ void test_spots(RealType T)
 {
   // Check some bad parameters to construct the distribution,
 #ifndef BOOST_NO_EXCEPTIONS
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, 0), std::domain_error); // zero scale.
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, -1), std::domain_error); // negative scale (shape).
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, 0), std::domain_error); // zero scale.
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType> nbad1(0, -1), std::domain_error); // negative scale (shape).
 #else
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, 0), std::domain_error); // zero scale.
-  BOOST_MATH_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, -1), std::domain_error); // negative scale (shape).
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, 0), std::domain_error); // zero scale.
+  BOOST_CHECK_THROW(boost::math::cauchy_distribution<RealType>(0, -1), std::domain_error); // negative scale (shape).
 #endif
   cauchy_distribution<RealType> C01;
 
@@ -667,35 +666,35 @@ void test_spots(RealType T)
    // To compile even if Cauchy mean is used.
    // See policy reference, mathematically undefined function policies
    //
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    mean(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    variance(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    standard_deviation(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    kurtosis(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    kurtosis_excess(dist),
    //    std::domain_error);
-   //BOOST_MATH_CHECK_THROW(
+   //BOOST_CHECK_THROW(
    //    skewness(dist),
    //    std::domain_error);
 
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(dist, RealType(0.0)),
        std::overflow_error);
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(dist, RealType(1.0)),
        std::overflow_error);
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(complement(dist, RealType(0.0))),
        std::overflow_error);
-   BOOST_MATH_CHECK_THROW(
+   BOOST_CHECK_THROW(
        quantile(complement(dist, RealType(1.0))),
        std::overflow_error);
 
@@ -705,7 +704,7 @@ void test_spots(RealType T)
 
 } // template <class RealType>void test_spots(RealType)
 
-BOOST_AUTO_TEST_CASE( test_main )
+BOOST_AUTO_TEST_CASE(test_main)
 {
   BOOST_MATH_CONTROL_FP;
    // Check that can generate cauchy distribution using the two convenience methods:
diff --git a/test/test_cauchy_cdf_double.cu b/test/test_cauchy_cdf_double.cu
new file mode 100644
index 0000000000..526744ba1f
--- /dev/null
+++ b/test/test_cauchy_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_cdf_float.cu b/test/test_cauchy_cdf_float.cu
new file mode 100644
index 0000000000..526744ba1f
--- /dev/null
+++ b/test/test_cauchy_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_cdf_nvrtc_double.cpp b/test/test_cauchy_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f9b16b6e0e
--- /dev/null
+++ b/test/test_cauchy_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/cauchy.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::cauchy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cauchy_cdf_nvrtc_float.cpp b/test/test_cauchy_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..0870900ae4
--- /dev/null
+++ b/test/test_cauchy_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/cauchy.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::cauchy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cauchy_pdf_double.cu b/test/test_cauchy_pdf_double.cu
new file mode 100644
index 0000000000..62398c31ed
--- /dev/null
+++ b/test/test_cauchy_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_pdf_float.cu b/test/test_cauchy_pdf_float.cu
new file mode 100644
index 0000000000..aff3369b83
--- /dev/null
+++ b/test/test_cauchy_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_pdf_nvrtc_double.cpp b/test/test_cauchy_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..24e2808c93
--- /dev/null
+++ b/test/test_cauchy_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/cauchy.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::cauchy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cauchy_pdf_nvrtc_float.cpp b/test/test_cauchy_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..605d257831
--- /dev/null
+++ b/test/test_cauchy_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/cauchy.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::cauchy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cauchy_quan_double.cu b/test/test_cauchy_quan_double.cu
new file mode 100644
index 0000000000..0fcaaafe7c
--- /dev/null
+++ b/test/test_cauchy_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_quan_float.cu b/test/test_cauchy_quan_float.cu
new file mode 100644
index 0000000000..9c04c5b12a
--- /dev/null
+++ b/test/test_cauchy_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::cauchy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_cauchy_quan_nvrtc_double.cpp b/test/test_cauchy_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..08fa01c4f9
--- /dev/null
+++ b/test/test_cauchy_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/cauchy.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::cauchy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cauchy_quan_nvrtc_float.cpp b/test/test_cauchy_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..6d63d1c2d6
--- /dev/null
+++ b/test/test_cauchy_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/cauchy.hpp>
+extern "C" __global__ 
+void test_cauchy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::cauchy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cauchy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cauchy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cauchy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::cauchy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cauchy_range_support_double.cu b/test/test_cauchy_range_support_double.cu
new file mode 100644
index 0000000000..3a42c1bd30
--- /dev/null
+++ b/test/test_cauchy_range_support_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = support(boost::math::cauchy_distribution<float_type>(in1[i])).second;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(support(boost::math::cauchy_distribution<float_type>(input_vector1[i])).second);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_cauchy_range_support_float.cu b/test/test_cauchy_range_support_float.cu
new file mode 100644
index 0000000000..e713736e60
--- /dev/null
+++ b/test/test_cauchy_range_support_float.cu
@@ -0,0 +1,111 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/cauchy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = range(boost::math::cauchy_distribution<float_type>(in1[i])).first;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(range(boost::math::cauchy_distribution<float_type>(input_vector1[i])).first);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            std::cerr << "Device got: " << output_vector[i] << ", and serial got: " << results[i] << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_cbrt.cpp b/test/test_cbrt.cpp
index 8b36a765d0..6abb9bd885 100644
--- a/test/test_cbrt.cpp
+++ b/test/test_cbrt.cpp
@@ -9,7 +9,10 @@
 #  pragma warning (disable : 4224)
 #endif
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp> // include /libs/math/src/
+#endif
+
 #include "test_cbrt.hpp"
 
 #include <boost/math/special_functions/cbrt.hpp> // Added to avoid link failure missing cbrt variants.
diff --git a/test/test_cbrt.hpp b/test/test_cbrt.hpp
index f606a58407..77e4aed516 100644
--- a/test/test_cbrt.hpp
+++ b/test/test_cbrt.hpp
@@ -1,5 +1,6 @@
 // Copyright John Maddock 2006.
 // Copyright Paul A. Bristow 2007, 2009
+// Copyright Matt Borland 2024
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,7 +10,6 @@
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
@@ -93,7 +93,9 @@ void test_cbrt(T, const char* name)
    }
    BOOST_IF_CONSTEXPR(std::numeric_limits<T>::has_quiet_NaN)
    {
+      #ifndef BOOST_MATH_NO_EXCEPTIONS
       BOOST_CHECK_THROW(boost::math::cbrt(std::numeric_limits<T>::quiet_NaN()), std::domain_error);
+      #endif
    }
 
 }
diff --git a/test/test_cbrt_double.cu b/test/test_cbrt_double.cu
new file mode 100644
index 0000000000..cc2c326850
--- /dev/null
+++ b/test/test_cbrt_double.cu
@@ -0,0 +1,99 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cbrt(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cbrt(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cbrt_float.cu b/test/test_cbrt_float.cu
new file mode 100644
index 0000000000..a4e98cce8d
--- /dev/null
+++ b/test/test_cbrt_float.cu
@@ -0,0 +1,99 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cbrt(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cbrt(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cbrt_nvrtc_double.cpp b/test/test_cbrt_nvrtc_double.cpp
new file mode 100644
index 0000000000..717d116d68
--- /dev/null
+++ b/test/test_cbrt_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/cbrt.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/cbrt.hpp>
+extern "C" __global__ 
+void test_cbrt_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cbrt(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cbrt_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cbrt_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cbrt_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::cbrt(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cbrt_nvrtc_float.cpp b/test/test_cbrt_nvrtc_float.cpp
new file mode 100644
index 0000000000..a595cb8705
--- /dev/null
+++ b/test/test_cbrt_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/cbrt.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/cbrt.hpp>
+extern "C" __global__ 
+void test_cbrt_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cbrt(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cbrt_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cbrt_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cbrt_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::cbrt(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_changesign_double.cu b/test/test_changesign_double.cu
new file mode 100644
index 0000000000..bfb2ade1e2
--- /dev/null
+++ b/test/test_changesign_double.cu
@@ -0,0 +1,111 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::changesign(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+        switch(i % 55)
+        {
+        case 1:
+           h_A[i] = 0;
+           break;
+        case 2:
+           h_A[i] = std::numeric_limits<float_type>::infinity();
+           break;
+        case 3:
+           h_A[i] = -std::numeric_limits<float_type>::infinity();
+           break;
+        }
+        if(i % 1)
+           h_A[i] = -h_A[i];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::changesign(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (h_C[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_changesign_float.cu b/test/test_changesign_float.cu
new file mode 100644
index 0000000000..d7e1764bdf
--- /dev/null
+++ b/test/test_changesign_float.cu
@@ -0,0 +1,111 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::changesign(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+        switch(i % 55)
+        {
+        case 1:
+           h_A[i] = 0;
+           break;
+        case 2:
+           h_A[i] = std::numeric_limits<float_type>::infinity();
+           break;
+        case 3:
+           h_A[i] = -std::numeric_limits<float_type>::infinity();
+           break;
+        }
+        if(i % 1)
+           h_A[i] = -h_A[i];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::changesign(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (h_C[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_chi_squared.cpp b/test/test_chi_squared.cpp
index cc7747a6c0..bfd4b5f3a2 100644
--- a/test/test_chi_squared.cpp
+++ b/test/test_chi_squared.cpp
@@ -16,9 +16,13 @@
 #  pragma warning(disable: 4127) // conditional expression is constant
 #endif
 
-#include <boost/math/tools/test.hpp> // for real_concept
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
+#endif
 
 #include <boost/math/distributions/chi_squared.hpp> // for chi_squared_distribution
 #include <boost/math/distributions/non_central_chi_squared.hpp> // for chi_squared_distribution
diff --git a/test/test_chi_squared_cdf_double.cu b/test/test_chi_squared_cdf_double.cu
new file mode 100644
index 0000000000..c2475883b9
--- /dev/null
+++ b/test/test_chi_squared_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::chi_squared_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_chi_squared_cdf_float.cu b/test/test_chi_squared_cdf_float.cu
new file mode 100644
index 0000000000..07dce0d067
--- /dev/null
+++ b/test/test_chi_squared_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::chi_squared_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_chi_squared_cdf_nvrtc_double.cpp b/test/test_chi_squared_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..0ad459fa67
--- /dev/null
+++ b/test/test_chi_squared_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/chi_squared.hpp>
+extern "C" __global__ 
+void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::chi_squared_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_chi_squared_cdf_nvrtc_float.cpp b/test/test_chi_squared_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..1b26c5d6f2
--- /dev/null
+++ b/test/test_chi_squared_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/chi_squared.hpp>
+extern "C" __global__ 
+void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::chi_squared_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_chi_squared_pdf_double.cu b/test/test_chi_squared_pdf_double.cu
new file mode 100644
index 0000000000..30edafd050
--- /dev/null
+++ b/test/test_chi_squared_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::chi_squared_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_chi_squared_pdf_float.cu b/test/test_chi_squared_pdf_float.cu
new file mode 100644
index 0000000000..9b205182ba
--- /dev/null
+++ b/test/test_chi_squared_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::chi_squared_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_chi_squared_pdf_nvrtc_double.cpp b/test/test_chi_squared_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..18d14a4b0e
--- /dev/null
+++ b/test/test_chi_squared_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/chi_squared.hpp>
+extern "C" __global__ 
+void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::chi_squared_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_chi_squared_pdf_nvrtc_float.cpp b/test/test_chi_squared_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..754cbf7fba
--- /dev/null
+++ b/test/test_chi_squared_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/chi_squared.hpp>
+extern "C" __global__ 
+void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::chi_squared_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_chi_squared_quan_double.cu b/test/test_chi_squared_quan_double.cu
new file mode 100644
index 0000000000..3fae7d966f
--- /dev/null
+++ b/test/test_chi_squared_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::chi_squared_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_chi_squared_quan_float.cu b/test/test_chi_squared_quan_float.cu
new file mode 100644
index 0000000000..7a717530e1
--- /dev/null
+++ b/test/test_chi_squared_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::chi_squared_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_chi_squared_quan_nvrtc_double.cpp b/test/test_chi_squared_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..69b15b6cfd
--- /dev/null
+++ b/test/test_chi_squared_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/chi_squared.hpp>
+extern "C" __global__ 
+void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::chi_squared_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_chi_squared_quan_nvrtc_float.cpp b/test/test_chi_squared_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..d6e1b2a9b5
--- /dev/null
+++ b/test/test_chi_squared_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/chi_squared.hpp>
+extern "C" __global__ 
+void test_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::chi_squared_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::chi_squared_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cos_pi_double.cu b/test/test_cos_pi_double.cu
new file mode 100644
index 0000000000..5a66b25ce2
--- /dev/null
+++ b/test/test_cos_pi_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cos_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cos_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cos_pi_float.cu b/test/test_cos_pi_float.cu
new file mode 100644
index 0000000000..6a04d8e046
--- /dev/null
+++ b/test/test_cos_pi_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cos_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cos_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cos_pi_nvrtc_double.cpp b/test/test_cos_pi_nvrtc_double.cpp
new file mode 100644
index 0000000000..459524bbed
--- /dev/null
+++ b/test/test_cos_pi_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/cos_pi.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/cos_pi.hpp>
+extern "C" __global__ 
+void test_cos_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cos_pi(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cos_pi_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cos_pi_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cos_pi_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::cos_pi(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cos_pi_nvrtc_float.cpp b/test/test_cos_pi_nvrtc_float.cpp
new file mode 100644
index 0000000000..2f541e217d
--- /dev/null
+++ b/test/test_cos_pi_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/cos_pi.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/cos_pi.hpp>
+extern "C" __global__ 
+void test_cos_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cos_pi(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cos_pi_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cos_pi_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cos_pi_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::cos_pi(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_bessel_i_double.cu b/test/test_cyl_bessel_i_double.cu
new file mode 100644
index 0000000000..91a3ed8ebf
--- /dev/null
+++ b/test/test_cyl_bessel_i_double.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_bessel_i(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_bessel_i_float.cu b/test/test_cyl_bessel_i_float.cu
new file mode 100644
index 0000000000..5aad1be88b
--- /dev/null
+++ b/test/test_cyl_bessel_i_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_bessel_i(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_bessel_i_nvrtc_double.cpp b/test/test_cyl_bessel_i_nvrtc_double.cpp
new file mode 100644
index 0000000000..50bfc0c790
--- /dev/null
+++ b/test/test_cyl_bessel_i_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_i_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_i_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_i_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_i_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_bessel_i(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_bessel_i_nvrtc_float.cpp b/test/test_cyl_bessel_i_nvrtc_float.cpp
new file mode 100644
index 0000000000..c73992a27a
--- /dev/null
+++ b/test/test_cyl_bessel_i_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_i_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_i(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_i_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_i_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_i_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_bessel_i(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_bessel_j_double.cu b/test/test_cyl_bessel_j_double.cu
new file mode 100644
index 0000000000..b5d93f1ddb
--- /dev/null
+++ b/test/test_cyl_bessel_j_double.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_bessel_j(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_bessel_j_float.cu b/test/test_cyl_bessel_j_float.cu
new file mode 100644
index 0000000000..3edc2a7c9c
--- /dev/null
+++ b/test/test_cyl_bessel_j_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_bessel_j(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_bessel_j_nvrtc_double.cpp b/test/test_cyl_bessel_j_nvrtc_double.cpp
new file mode 100644
index 0000000000..f74e112edd
--- /dev/null
+++ b/test/test_cyl_bessel_j_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_j_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_bessel_j(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_bessel_j_nvrtc_float.cpp b/test/test_cyl_bessel_j_nvrtc_float.cpp
new file mode 100644
index 0000000000..e3d7928438
--- /dev/null
+++ b/test/test_cyl_bessel_j_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_j_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_j(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_bessel_j(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_bessel_k_double.cu b/test/test_cyl_bessel_k_double.cu
new file mode 100644
index 0000000000..3dfd2bf388
--- /dev/null
+++ b/test/test_cyl_bessel_k_double.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_bessel_k(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_bessel_k_float.cu b/test/test_cyl_bessel_k_float.cu
new file mode 100644
index 0000000000..b874857a05
--- /dev/null
+++ b/test/test_cyl_bessel_k_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_bessel_k(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_bessel_k_nvrtc_double.cpp b/test/test_cyl_bessel_k_nvrtc_double.cpp
new file mode 100644
index 0000000000..66a8b14900
--- /dev/null
+++ b/test/test_cyl_bessel_k_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_k_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_k_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_k_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_k_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_bessel_k(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_bessel_k_nvrtc_float.cpp b/test/test_cyl_bessel_k_nvrtc_float.cpp
new file mode 100644
index 0000000000..e23ff82c0d
--- /dev/null
+++ b/test/test_cyl_bessel_k_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_k_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_bessel_k(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_k_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_k_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_k_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_bessel_k(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_hankel_1_double.cu b/test/test_cyl_hankel_1_double.cu
new file mode 100644
index 0000000000..1349469341
--- /dev/null
+++ b/test/test_cyl_hankel_1_double.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::cyl_hankel_1(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_hankel_1_float.cu b/test/test_cyl_hankel_1_float.cu
new file mode 100644
index 0000000000..da78c375c6
--- /dev/null
+++ b/test/test_cyl_hankel_1_float.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::cyl_hankel_1(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_hankel_1_nvrtc_double.cpp b/test/test_cyl_hankel_1_nvrtc_double.cpp
new file mode 100644
index 0000000000..298436d063
--- /dev/null
+++ b/test/test_cyl_hankel_1_nvrtc_double.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_cyl_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_hankel_1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_hankel_1(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_hankel_1_nvrtc_float.cpp b/test/test_cyl_hankel_1_nvrtc_float.cpp
new file mode 100644
index 0000000000..d505c7bc4c
--- /dev/null
+++ b/test/test_cyl_hankel_1_nvrtc_float.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_cyl_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_1(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_hankel_1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_hankel_1(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_hankel_2_double.cu b/test/test_cyl_hankel_2_double.cu
new file mode 100644
index 0000000000..55b643173a
--- /dev/null
+++ b/test/test_cyl_hankel_2_double.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::cyl_hankel_2(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_hankel_2_float.cu b/test/test_cyl_hankel_2_float.cu
new file mode 100644
index 0000000000..5766ebeb48
--- /dev/null
+++ b/test/test_cyl_hankel_2_float.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::cyl_hankel_2(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_hankel_2_nvrtc_double.cpp b/test/test_cyl_hankel_2_nvrtc_double.cpp
new file mode 100644
index 0000000000..f7589d2016
--- /dev/null
+++ b/test/test_cyl_hankel_2_nvrtc_double.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_cyl_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_2_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_hankel_2_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_2_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_hankel_2(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_hankel_2_nvrtc_float.cpp b/test/test_cyl_hankel_2_nvrtc_float.cpp
new file mode 100644
index 0000000000..54216d39c9
--- /dev/null
+++ b/test/test_cyl_hankel_2_nvrtc_float.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_cyl_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_hankel_2(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_hankel_2_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_hankel_2_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_hankel_2_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_hankel_2(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_neumann_double.cu b/test/test_cyl_neumann_double.cu
new file mode 100644
index 0000000000..0e7a72ff98
--- /dev/null
+++ b/test/test_cyl_neumann_double.cu
@@ -0,0 +1,116 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_neumann(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_neumann(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(output_vector[i]) && std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000)
+            {
+                std::cout << "error at line: " << i
+                            << "\nParallel: " << results[i]
+                            << "\n  Serial: " << output_vector[i]
+                            << "\n    Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_neumann_float.cu b/test/test_cyl_neumann_float.cu
new file mode 100644
index 0000000000..f621d2fc65
--- /dev/null
+++ b/test/test_cyl_neumann_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_neumann(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::cyl_neumann(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_cyl_neumann_nvrtc_double.cpp b/test/test_cyl_neumann_nvrtc_double.cpp
new file mode 100644
index 0000000000..78bbd3b5ca
--- /dev/null
+++ b/test/test_cyl_neumann_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_neumann(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_neumann_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_neumann_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_neumann_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_neumann(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_cyl_neumann_nvrtc_float.cpp b/test/test_cyl_neumann_nvrtc_float.cpp
new file mode 100644
index 0000000000..78bbd3b5ca
--- /dev/null
+++ b/test/test_cyl_neumann_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::cyl_neumann(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_neumann_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_neumann_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_neumann_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::cyl_neumann(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_digamma_double.cu b/test/test_digamma_double.cu
new file mode 100644
index 0000000000..c88fe153c5
--- /dev/null
+++ b/test/test_digamma_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::digamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::digamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_digamma_float.cu b/test/test_digamma_float.cu
new file mode 100644
index 0000000000..ea1b1c68e9
--- /dev/null
+++ b/test/test_digamma_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::digamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::digamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_digamma_nvrtc_double.cpp b/test/test_digamma_nvrtc_double.cpp
new file mode 100644
index 0000000000..d3da101881
--- /dev/null
+++ b/test/test_digamma_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/digamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/digamma.hpp>
+extern "C" __global__ 
+void test_digamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::digamma(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_digamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_digamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_digamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::digamma(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_digamma_nvrtc_float.cpp b/test/test_digamma_nvrtc_float.cpp
new file mode 100644
index 0000000000..a698cbd56d
--- /dev/null
+++ b/test/test_digamma_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/digamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/digamma.hpp>
+extern "C" __global__ 
+void test_digamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::digamma(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_digamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_digamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_digamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::digamma(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_digamma_simple.cpp b/test/test_digamma_simple.cpp
new file mode 100644
index 0000000000..bbe003a015
--- /dev/null
+++ b/test/test_digamma_simple.cpp
@@ -0,0 +1,50 @@
+//  (C) Copyright John Maddock 2006.
+//  (C) Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <boost/math/special_functions/digamma.hpp>
+#include "math_unit_test.hpp"
+
+template <class T>
+void test_spots(T, const char* t)
+{
+   std::cout << "Testing basic sanity checks for type " << t << std::endl;
+   //
+   // Basic sanity checks, tolerance is 3 epsilon:
+   //
+   T tolerance = 3;
+   //
+   // Special tolerance (200eps) for when we're very near the root,
+   // and T has more than 64-bits in it's mantissa:
+   //
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(0.125)), static_cast<T>(-8.3884926632958548678027429230863430000514460424495L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(0.5)), static_cast<T>(-1.9635100260214234794409763329987555671931596046604L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(1)), static_cast<T>(-0.57721566490153286060651209008240243104215933593992L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(1.5)), static_cast<T>(0.036489973978576520559023667001244432806840395339566L), tolerance * 40);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(1.5) - static_cast<T>(1)/32), static_cast<T>(0.00686541147073577672813890866512415766586241385896200579891429L), tolerance * 200);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(2)), static_cast<T>(0.42278433509846713939348790991759756895784066406008L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(8)), static_cast<T>(2.0156414779556099965363450527747404261006978069172L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(12)), static_cast<T>(2.4426616799758120167383652547949424463027180089374L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(22)), static_cast<T>(3.0681430398611966699248760264450329818421699570581L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(50)), static_cast<T>(3.9019896734278921969539597028823666609284424880275L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(500)), static_cast<T>(6.2136077650889917423827750552855712637776544784569L), tolerance);
+   //
+   // negative values:
+   //
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-0.125)), static_cast<T>(7.1959829284523046176757814502538535827603450463013L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-10.125)), static_cast<T>(9.9480538258660761287008034071425343357982429855241L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-10.875)), static_cast<T>(-5.1527360383841562620205965901515879492020193154231L), tolerance);
+   CHECK_ULP_CLOSE(::boost::math::digamma(static_cast<T>(-1.5)), static_cast<T>(0.70315664064524318722569033366791109947350706200623L), tolerance);
+}
+
+int main()
+{
+   test_spots(0.0F, "float");
+   test_spots(0.0, "double");
+
+   return boost::math::test::report_errors();
+}
+
+
diff --git a/test/test_ellint_1.cpp b/test/test_ellint_1.cpp
index b5cb2a359e..9366e4545a 100644
--- a/test/test_ellint_1.cpp
+++ b/test/test_ellint_1.cpp
@@ -6,7 +6,14 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#endif
+
 #include "test_ellint_1.hpp"
 
 //
diff --git a/test/test_ellint_1.hpp b/test/test_ellint_1.hpp
index 635bcf2293..786841302c 100644
--- a/test/test_ellint_1.hpp
+++ b/test/test_ellint_1.hpp
@@ -9,11 +9,15 @@
 // Constants are too big for float case, but this doesn't matter for test.
 #endif
 
+#include <boost/math/tools/config.hpp>
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/ellint_1.hpp>
 #include <boost/array.hpp>
 #include "functor.hpp"
 
@@ -139,11 +143,13 @@ void test_spots(T, const char* type_name)
     //
     // Test error handling:
     //
+    #ifndef BOOST_MATH_NO_EXCEPTIONS
     BOOST_CHECK_GE(boost::math::ellint_1(T(1)), boost::math::tools::max_value<T>());
     BOOST_CHECK_GE(boost::math::ellint_1(T(-1)), boost::math::tools::max_value<T>());
     BOOST_CHECK_THROW(boost::math::ellint_1(T(1.0001)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::ellint_1(T(-1.0001)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::ellint_1(T(2.2), T(0.5)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::ellint_1(T(-2.2), T(0.5)), std::domain_error);
+    #endif
 }
 
diff --git a/test/test_ellint_1_double.cu b/test/test_ellint_1_double.cu
new file mode 100644
index 0000000000..eb9bfb162d
--- /dev/null
+++ b/test/test_ellint_1_double.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_1_float.cu b/test/test_ellint_1_float.cu
new file mode 100644
index 0000000000..8de959d225
--- /dev/null
+++ b/test/test_ellint_1_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_1_nvrtc_double.cpp b/test/test_ellint_1_nvrtc_double.cpp
new file mode 100644
index 0000000000..fac5da55f0
--- /dev/null
+++ b/test/test_ellint_1_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_1.hpp>
+extern "C" __global__ 
+void test_ellint_1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ellint_1_nvrtc_float.cpp b/test/test_ellint_1_nvrtc_float.cpp
new file mode 100644
index 0000000000..fac5da55f0
--- /dev/null
+++ b/test/test_ellint_1_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_1.hpp>
+extern "C" __global__ 
+void test_ellint_1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_1(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ellint_2.cpp b/test/test_ellint_2.cpp
index ca3e994d4d..0da012c133 100644
--- a/test/test_ellint_2.cpp
+++ b/test/test_ellint_2.cpp
@@ -6,7 +6,10 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
 #include "test_ellint_2.hpp"
 
 //
@@ -72,7 +75,11 @@ void expected_results()
       ".*",                          // platform
       largest_type,                  // test type(s)
       ".*",      // test data group
+      #ifdef SYCL_LANGUAGE_VERSION
+      ".*", 20, 6);  // test function
+      #else
       ".*", 15, 6);  // test function
+      #endif
    add_expected_result(
       ".*",                          // compiler
       ".*",                          // stdlib
diff --git a/test/test_ellint_2.hpp b/test/test_ellint_2.hpp
index e38f94d984..29a73c9961 100644
--- a/test/test_ellint_2.hpp
+++ b/test/test_ellint_2.hpp
@@ -9,11 +9,18 @@
 // Constants are too big for float case, but this doesn't matter for test.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/ellint_2.hpp>
+#include <boost/math/constants/constants.hpp>
 #include <boost/array.hpp>
 #include "functor.hpp"
 
@@ -157,10 +164,12 @@ void test_spots(T, const char* type_name)
     //
     // Test error handling:
     //
+    #ifndef BOOST_MATH_NO_EXCEPTIONS
     BOOST_CHECK_EQUAL(boost::math::ellint_2(T(1)), T(1));
     BOOST_CHECK_EQUAL(boost::math::ellint_2(T(-1)), T(1));
     BOOST_CHECK_THROW(boost::math::ellint_2(T(1.5)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::ellint_2(T(-1.5)), std::domain_error);
     BOOST_CHECK_THROW(boost::math::ellint_2(T(1.5), T(1.5)), std::domain_error);
+    #endif
 }
 
diff --git a/test/test_ellint_2_double.cu b/test/test_ellint_2_double.cu
new file mode 100644
index 0000000000..2e1073576e
--- /dev/null
+++ b/test/test_ellint_2_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_2(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_2(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_2_float.cu b/test/test_ellint_2_float.cu
new file mode 100644
index 0000000000..a55a6d1ad4
--- /dev/null
+++ b/test/test_ellint_2_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_2(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_2(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_2_nvrtc_double.cpp b/test/test_ellint_2_nvrtc_double.cpp
new file mode 100644
index 0000000000..dd2eef1547
--- /dev/null
+++ b/test/test_ellint_2_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_2.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_2.hpp>
+extern "C" __global__ 
+void test_ellint_2_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_2(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_2_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_2_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_2_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_2(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ellint_2_nvrtc_float.cpp b/test/test_ellint_2_nvrtc_float.cpp
new file mode 100644
index 0000000000..dd2eef1547
--- /dev/null
+++ b/test/test_ellint_2_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_2.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_2.hpp>
+extern "C" __global__ 
+void test_ellint_2_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_2(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_2_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_2_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_2_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_2(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ellint_3_double.cu b/test/test_ellint_3_double.cu
new file mode 100644
index 0000000000..979e01ff18
--- /dev/null
+++ b/test/test_ellint_3_double.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_3(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_3_float.cu b/test/test_ellint_3_float.cu
new file mode 100644
index 0000000000..979e01ff18
--- /dev/null
+++ b/test/test_ellint_3_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_3(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_3_nvrtc_double.cpp b/test/test_ellint_3_nvrtc_double.cpp
new file mode 100644
index 0000000000..dacab66192
--- /dev/null
+++ b/test/test_ellint_3_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_3.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_3.hpp>
+extern "C" __global__ 
+void test_ellint_3_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_3(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_3_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_3_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_3_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_3(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ellint_3_nvrtc_float.cpp b/test/test_ellint_3_nvrtc_float.cpp
new file mode 100644
index 0000000000..72b2ec71e7
--- /dev/null
+++ b/test/test_ellint_3_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_3.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_3.hpp>
+extern "C" __global__ 
+void test_ellint_3_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_3(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_3_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_3_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_3_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_3(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ellint_d.cpp b/test/test_ellint_d.cpp
index 5e76a49fb6..420bc0c022 100644
--- a/test/test_ellint_d.cpp
+++ b/test/test_ellint_d.cpp
@@ -4,7 +4,10 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
 #include "test_ellint_d.hpp"
 
 //
diff --git a/test/test_ellint_d.hpp b/test/test_ellint_d.hpp
index de53936f1f..c33a4d942a 100644
--- a/test/test_ellint_d.hpp
+++ b/test/test_ellint_d.hpp
@@ -8,11 +8,17 @@
 // Constants are too big for float case, but this doesn't matter for test.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/ellint_d.hpp>
 #include <boost/array.hpp>
 #include "functor.hpp"
 
@@ -117,6 +123,7 @@ void test_spots(T, const char* type_name)
 
     do_test_ellint_d1<T>(ellint_d_data, type_name, "Elliptic Integral D: Random Data");
 
+    #ifdef BOOST_MATH_NO_EXCEPTIONS
     BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(1)), std::domain_error);
     BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(-1)), std::domain_error);
     BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(1.5)), std::domain_error);
@@ -126,5 +133,6 @@ void test_spots(T, const char* type_name)
        BOOST_CHECK_EQUAL(boost::math::ellint_d(T(0.5), std::numeric_limits<T>::infinity()), std::numeric_limits<T>::infinity());
     }
     BOOST_MATH_CHECK_THROW(boost::math::ellint_d(T(1.5), T(1.0)), std::domain_error);
+    #endif
 }
 
diff --git a/test/test_ellint_d_double.cu b/test/test_ellint_d_double.cu
new file mode 100644
index 0000000000..979e01ff18
--- /dev/null
+++ b/test/test_ellint_d_double.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_3(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_d_float.cu b/test/test_ellint_d_float.cu
new file mode 100644
index 0000000000..50882aa76a
--- /dev/null
+++ b/test/test_ellint_d_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_3(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ellint_3(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_ellint_d_nvrtc_double.cpp b/test/test_ellint_d_nvrtc_double.cpp
new file mode 100644
index 0000000000..cb65a2e731
--- /dev/null
+++ b/test/test_ellint_d_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_d.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_d.hpp>
+extern "C" __global__ 
+void test_ellint_d_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_d(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_d_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_d_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_d_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_d(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ellint_d_nvrtc_float.cpp b/test/test_ellint_d_nvrtc_float.cpp
new file mode 100644
index 0000000000..727d9dcd17
--- /dev/null
+++ b/test/test_ellint_d_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/ellint_d.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/ellint_d.hpp>
+extern "C" __global__ 
+void test_ellint_d_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ellint_d(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ellint_d_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ellint_d_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ellint_d_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ellint_d(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erf.cpp b/test/test_erf.cpp
index 5359039834..2232c1c759 100644
--- a/test/test_erf.cpp
+++ b/test/test_erf.cpp
@@ -4,7 +4,20 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#include <boost/math/special_functions/erf.hpp>
 #include "test_erf.hpp"
 
 //
diff --git a/test/test_erf.hpp b/test/test_erf.hpp
index dc42c81248..b70c739530 100644
--- a/test/test_erf.hpp
+++ b/test/test_erf.hpp
@@ -1,9 +1,11 @@
-// Copyright John Maddock 2006.
-// Copyright Paul A. Bristow 2007, 2009
+//  Copyright John Maddock 2006.
+//  Copyright Paul A. Bristow 2007, 2009
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/concepts/real_concept.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
@@ -21,6 +23,11 @@
 #define SC_(x) static_cast<typename table_type<T>::type>(BOOST_JOIN(x, L))
 #endif
 
+#ifdef BOOST_MATH_NO_EXCEPTIONS
+#  undef BOOST_CHECK_THROW
+#  define BOOST_CHECK_THROW(x, y)
+#endif
+
 template <class Real, class T>
 void do_test_erf(const T& data, const char* type_name, const char* test_name)
 {
diff --git a/test/test_erf_double.cu b/test/test_erf_double.cu
new file mode 100644
index 0000000000..3e8398262a
--- /dev/null
+++ b/test/test_erf_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erf(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erf_float.cu b/test/test_erf_float.cu
new file mode 100644
index 0000000000..6cbd07e6ae
--- /dev/null
+++ b/test/test_erf_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erf(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erf_inv_double.cu b/test/test_erf_inv_double.cu
new file mode 100644
index 0000000000..f540babbb2
--- /dev/null
+++ b/test/test_erf_inv_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf_inv(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erf_inv(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erf_inv_float.cu b/test/test_erf_inv_float.cu
new file mode 100644
index 0000000000..d9f37687f1
--- /dev/null
+++ b/test/test_erf_inv_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf_inv(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erf_inv(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erf_inv_nvrtc_double.cpp b/test/test_erf_inv_nvrtc_double.cpp
new file mode 100644
index 0000000000..5588b76689
--- /dev/null
+++ b/test/test_erf_inv_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf_inv(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erf_inv(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erf_inv_nvrtc_float.cpp b/test/test_erf_inv_nvrtc_float.cpp
new file mode 100644
index 0000000000..ff7f6db98b
--- /dev/null
+++ b/test/test_erf_inv_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf_inv(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erf_inv(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erf_nvrtc_double.cpp b/test/test_erf_nvrtc_double.cpp
new file mode 100644
index 0000000000..e20d0188d0
--- /dev/null
+++ b/test/test_erf_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erf(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erf_nvrtc_float.cpp b/test/test_erf_nvrtc_float.cpp
new file mode 100644
index 0000000000..913b1a14cc
--- /dev/null
+++ b/test/test_erf_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erf(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erf(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erfc_double.cu b/test/test_erfc_double.cu
new file mode 100644
index 0000000000..86d3c6e5b4
--- /dev/null
+++ b/test/test_erfc_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erfc(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erfc_float.cu b/test/test_erfc_float.cu
new file mode 100644
index 0000000000..7970063a47
--- /dev/null
+++ b/test/test_erfc_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erfc(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erfc_inv_double.cu b/test/test_erfc_inv_double.cu
new file mode 100644
index 0000000000..68642cd109
--- /dev/null
+++ b/test/test_erfc_inv_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc_inv(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erfc_inv(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erfc_inv_float.cu b/test/test_erfc_inv_float.cu
new file mode 100644
index 0000000000..b5b72cd057
--- /dev/null
+++ b/test/test_erfc_inv_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc_inv(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::erfc_inv(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_erfc_inv_nvrtc_double.cpp b/test/test_erfc_inv_nvrtc_double.cpp
new file mode 100644
index 0000000000..ae961d657e
--- /dev/null
+++ b/test/test_erfc_inv_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc_inv(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erfc_inv(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erfc_inv_nvrtc_float.cpp b/test/test_erfc_inv_nvrtc_float.cpp
new file mode 100644
index 0000000000..b676330ceb
--- /dev/null
+++ b/test/test_erfc_inv_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc_inv(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erfc_inv(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erfc_nvrtc_double.cpp b/test/test_erfc_nvrtc_double.cpp
new file mode 100644
index 0000000000..c43a469acf
--- /dev/null
+++ b/test/test_erfc_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erfc(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_erfc_nvrtc_float.cpp b/test/test_erfc_nvrtc_float.cpp
new file mode 100644
index 0000000000..f8756045a8
--- /dev/null
+++ b/test/test_erfc_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/erf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/erf.hpp>
+extern "C" __global__ 
+void test_erf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::erfc(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_erf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_erf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_erf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::erfc(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exp_sinh_quad_double.cu b/test/test_exp_sinh_quad_double.cu
new file mode 100644
index 0000000000..59f6d8a12f
--- /dev/null
+++ b/test/test_exp_sinh_quad_double.cu
@@ -0,0 +1,133 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/quadrature/exp_sinh.hpp>
+#include <boost/math/special_functions.hpp>
+#include <boost/math/tools/precision.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+__host__ __device__ float_type func(float_type x)
+{
+    BOOST_MATH_STD_USING
+    return 1/(1+x*x);
+}
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::quadrature::exp_sinh<float_type> integrator;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(integrator.integrate(func, tol, &error, &L1));
+    }
+    double t = w.elapsed();
+    // check the results
+    int failed_count = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
+        if (eps > 10)
+        {
+            std::cerr   << std::setprecision(std::numeric_limits<float_type>::digits10)
+                        << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i]
+                        << "\n  Host: " << results[i]
+                        << "\n   Eps: " << eps << "\n";
+            failed_count++;
+        }
+        if (failed_count > 100)
+        {
+            break;
+        }
+    }
+
+    if (failed_count != 0)
+    {
+        std::cout << "Test FAILED" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_exp_sinh_quad_float.cu b/test/test_exp_sinh_quad_float.cu
new file mode 100644
index 0000000000..1472dbcde8
--- /dev/null
+++ b/test/test_exp_sinh_quad_float.cu
@@ -0,0 +1,133 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/quadrature/exp_sinh.hpp>
+#include <boost/math/special_functions.hpp>
+#include <boost/math/tools/precision.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+__host__ __device__ float_type func(float_type x)
+{
+    BOOST_MATH_STD_USING
+    return 1/(1+x*x);
+}
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::quadrature::exp_sinh<float_type> integrator;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(integrator.integrate(func, tol, &error, &L1));
+    }
+    double t = w.elapsed();
+    // check the results
+    int failed_count = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
+        if (eps > 10)
+        {
+            std::cerr   << std::setprecision(std::numeric_limits<float_type>::digits10)
+                        << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i]
+                        << "\n  Host: " << results[i]
+                        << "\n   Eps: " << eps << "\n";
+            failed_count++;
+        }
+        if (failed_count > 100)
+        {
+            break;
+        }
+    }
+
+    if (failed_count != 0)
+    {
+        std::cout << "Test FAILED" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_exp_sinh_quad_nvrtc_double.cpp b/test/test_exp_sinh_quad_nvrtc_double.cpp
new file mode 100644
index 0000000000..bfd5080928
--- /dev/null
+++ b/test/test_exp_sinh_quad_nvrtc_double.cpp
@@ -0,0 +1,206 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/quadrature/exp_sinh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/quadrature/exp_sinh.hpp>
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+extern "C" __global__ 
+void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+)";
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_expm1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
+
+        int numElements = 50000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        float_type tol = boost::math::tools::root_epsilon<float_type>();
+        float_type error;
+        float_type L1;
+        boost::math::quadrature::exp_sinh<float_type> integrator;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = integrator.integrate(func, tol, &error, &L1);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exp_sinh_quad_nvrtc_float.cpp b/test/test_exp_sinh_quad_nvrtc_float.cpp
new file mode 100644
index 0000000000..b472e5597c
--- /dev/null
+++ b/test/test_exp_sinh_quad_nvrtc_float.cpp
@@ -0,0 +1,206 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/quadrature/exp_sinh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/quadrature/exp_sinh.hpp>
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+extern "C" __global__ 
+void test_expm1_kernel(const float_type*, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+)";
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_expm1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
+
+        int numElements = 50000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        float_type tol = boost::math::tools::root_epsilon<float_type>();
+        float_type error;
+        float_type L1;
+        boost::math::quadrature::exp_sinh<float_type> integrator;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = integrator.integrate(func, tol, &error, &L1);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_expint.cpp b/test/test_expint.cpp
index 3f44a80915..3eede5e389 100644
--- a/test/test_expint.cpp
+++ b/test/test_expint.cpp
@@ -3,7 +3,14 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#endif
+
 #include "test_expint.hpp"
 
 //
@@ -78,7 +85,11 @@ void expected_results()
       ".*",                          // platform
       "float|double|long double",    // test type(s)
       ".*Ei.*",                      // test data group
+      #ifndef SYCL_LANGUAGE_VERSION
       ".*", 6, 3);                   // test function
+      #else
+      ".*", 10, 3);
+      #endif
    if(std::numeric_limits<long double>::digits > 100)
    {
       add_expected_result(
diff --git a/test/test_expint.hpp b/test/test_expint.hpp
index 491db2fcdc..d6524810e4 100644
--- a/test/test_expint.hpp
+++ b/test/test_expint.hpp
@@ -4,13 +4,19 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
+
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/expint.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_expint_double.cu b/test/test_expint_double.cu
new file mode 100644
index 0000000000..d82e90a937
--- /dev/null
+++ b/test/test_expint_double.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::expint(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::expint(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_expint_float.cu b/test/test_expint_float.cu
new file mode 100644
index 0000000000..dd1fccd1d7
--- /dev/null
+++ b/test/test_expint_float.cu
@@ -0,0 +1,106 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::expint(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::expint(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_expint_nvrtc_double.cpp b/test/test_expint_nvrtc_double.cpp
new file mode 100644
index 0000000000..3ab45e6a1a
--- /dev/null
+++ b/test/test_expint_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/expint.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/expint.hpp>
+extern "C" __global__ 
+void test_expint_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::expint(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expint_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_expint_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_expint_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::expint(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_expint_nvrtc_float.cpp b/test/test_expint_nvrtc_float.cpp
new file mode 100644
index 0000000000..bff58580eb
--- /dev/null
+++ b/test/test_expint_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/expint.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/expint.hpp>
+extern "C" __global__ 
+void test_expint_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::expint(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expint_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_expint_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_expint_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::expint(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_expm1_double.cu b/test/test_expm1_double.cu
new file mode 100644
index 0000000000..cfed7d840d
--- /dev/null
+++ b/test/test_expm1_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::expm1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::expm1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_expm1_float.cu b/test/test_expm1_float.cu
new file mode 100644
index 0000000000..3d439b8872
--- /dev/null
+++ b/test/test_expm1_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::expm1(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::expm1(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_expm1_nvrtc_double.cpp b/test/test_expm1_nvrtc_double.cpp
new file mode 100644
index 0000000000..ea496b73a7
--- /dev/null
+++ b/test/test_expm1_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/expm1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/expm1.hpp>
+extern "C" __global__ 
+void test_expm1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::expm1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_expm1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::expm1(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_expm1_nvrtc_float.cpp b/test/test_expm1_nvrtc_float.cpp
new file mode 100644
index 0000000000..16d8636db5
--- /dev/null
+++ b/test/test_expm1_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/expm1.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/expm1.hpp>
+extern "C" __global__ 
+void test_expm1_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::expm1(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_expm1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_expm1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_expm1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::expm1(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_expm1_simple.cpp b/test/test_expm1_simple.cpp
new file mode 100644
index 0000000000..00513ea409
--- /dev/null
+++ b/test/test_expm1_simple.cpp
@@ -0,0 +1,32 @@
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <random>
+#include <cmath>
+#include <boost/math/special_functions/expm1.hpp>
+#include "math_unit_test.hpp"
+
+constexpr int N = 50000;
+
+template <typename T>
+void test()
+{
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<T> dist(0, 0.01);
+
+    for (int n = 0; n < N; ++n)
+    {
+        const T value (dist(rng));
+        CHECK_ULP_CLOSE(std::expm1(value), boost::math::expm1(value), 10);
+    }
+}
+
+int main()
+{
+    test<float>();
+    test<double>();
+
+    return boost::math::test::report_errors();
+}
diff --git a/test/test_exponential_cdf_double.cu b/test/test_exponential_cdf_double.cu
new file mode 100644
index 0000000000..e3a57e86ec
--- /dev/null
+++ b/test/test_exponential_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::exponential_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_exponential_cdf_float.cu b/test/test_exponential_cdf_float.cu
new file mode 100644
index 0000000000..ed214a4953
--- /dev/null
+++ b/test/test_exponential_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::exponential_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_exponential_cdf_nvrtc_double.cpp b/test/test_exponential_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..42849f8abe
--- /dev/null
+++ b/test/test_exponential_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/exponential.hpp>
+extern "C" __global__ 
+void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_exponential_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::exponential_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exponential_cdf_nvrtc_float.cpp b/test/test_exponential_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..9417abf026
--- /dev/null
+++ b/test/test_exponential_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/exponential.hpp>
+extern "C" __global__ 
+void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_exponential_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::exponential_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exponential_dist.cpp b/test/test_exponential_dist.cpp
index d1898fa7c4..1b21df1411 100644
--- a/test/test_exponential_dist.cpp
+++ b/test/test_exponential_dist.cpp
@@ -8,7 +8,7 @@
 
 // test_exponential_dist.cpp
 
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 #include <boost/math/distributions/exponential.hpp>
     using boost::math::exponential_distribution;
diff --git a/test/test_exponential_pdf_double.cu b/test/test_exponential_pdf_double.cu
new file mode 100644
index 0000000000..530b1023b3
--- /dev/null
+++ b/test/test_exponential_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::exponential_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_exponential_pdf_float.cu b/test/test_exponential_pdf_float.cu
new file mode 100644
index 0000000000..0801e2d0be
--- /dev/null
+++ b/test/test_exponential_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::exponential_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_exponential_pdf_nvrtc_double.cpp b/test/test_exponential_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..2c52a4b38e
--- /dev/null
+++ b/test/test_exponential_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/exponential.hpp>
+extern "C" __global__ 
+void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_exponential_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::exponential_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exponential_pdf_nvrtc_float.cpp b/test/test_exponential_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..ef1aff3f3c
--- /dev/null
+++ b/test/test_exponential_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/exponential.hpp>
+extern "C" __global__ 
+void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_exponential_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::exponential_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exponential_quan_double.cu b/test/test_exponential_quan_double.cu
new file mode 100644
index 0000000000..f4eb4c3b18
--- /dev/null
+++ b/test/test_exponential_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::exponential_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_exponential_quan_float.cu b/test/test_exponential_quan_float.cu
new file mode 100644
index 0000000000..f4eb4c3b18
--- /dev/null
+++ b/test/test_exponential_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::exponential_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_exponential_quan_nvrtc_double.cpp b/test/test_exponential_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..b05f77ffef
--- /dev/null
+++ b/test/test_exponential_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/exponential.hpp>
+extern "C" __global__ 
+void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_exponential_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::exponential_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exponential_quan_nvrtc_float.cpp b/test/test_exponential_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..fb932c5d7c
--- /dev/null
+++ b/test/test_exponential_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/exponential.hpp>
+extern "C" __global__ 
+void test_exponential_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::exponential_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_exponential_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_exponential_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_exponential_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::exponential_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_exponential_range_support_double.cu b/test/test_exponential_range_support_double.cu
new file mode 100644
index 0000000000..c19497ed50
--- /dev/null
+++ b/test/test_exponential_range_support_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = support(boost::math::exponential_distribution<float_type>(in1[i])).second;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(support(boost::math::exponential_distribution<float_type>(input_vector1[i])).second);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_exponential_range_support_float.cu b/test/test_exponential_range_support_float.cu
new file mode 100644
index 0000000000..a111090de5
--- /dev/null
+++ b/test/test_exponential_range_support_float.cu
@@ -0,0 +1,111 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/exponential.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type* in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = range(boost::math::exponential_distribution<float_type>(in1[i])).first;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(range(boost::math::exponential_distribution<float_type>(input_vector1[i])).first);
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            std::cerr << "Device got: " << output_vector[i] << ", and serial got: " << results[i] << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_extreme_value.cpp b/test/test_extreme_value.cpp
index fd8d928630..cb758e8f5b 100644
--- a/test/test_extreme_value.cpp
+++ b/test/test_extreme_value.cpp
@@ -1,5 +1,5 @@
 // Copyright John Maddock 2006.
-
+// Copyright Matt Borland 2024.
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -7,7 +7,7 @@
 
 // test_extreme_value.cpp
 
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 #include <boost/math/distributions/extreme_value.hpp>
     using boost::math::extreme_value_distribution;
diff --git a/test/test_extreme_value_cdf_double.cu b/test/test_extreme_value_cdf_double.cu
new file mode 100644
index 0000000000..7ca0003482
--- /dev/null
+++ b/test/test_extreme_value_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::extreme_value_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_extreme_value_cdf_float.cu b/test/test_extreme_value_cdf_float.cu
new file mode 100644
index 0000000000..bc3ead6ebb
--- /dev/null
+++ b/test/test_extreme_value_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::extreme_value_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_extreme_value_cdf_nvrtc_double.cpp b/test/test_extreme_value_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..b3600b21bf
--- /dev/null
+++ b/test/test_extreme_value_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/extreme_value.hpp>
+extern "C" __global__ 
+void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_extreme_value_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::extreme_value_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_extreme_value_cdf_nvrtc_float.cpp b/test/test_extreme_value_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..b3600b21bf
--- /dev/null
+++ b/test/test_extreme_value_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/extreme_value.hpp>
+extern "C" __global__ 
+void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_extreme_value_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::extreme_value_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_extreme_value_pdf_double.cu b/test/test_extreme_value_pdf_double.cu
new file mode 100644
index 0000000000..44ccc5b716
--- /dev/null
+++ b/test/test_extreme_value_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::extreme_value_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_extreme_value_pdf_float.cu b/test/test_extreme_value_pdf_float.cu
new file mode 100644
index 0000000000..390622f400
--- /dev/null
+++ b/test/test_extreme_value_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::extreme_value_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_extreme_value_pdf_nvrtc_double.cpp b/test/test_extreme_value_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..049f45d890
--- /dev/null
+++ b/test/test_extreme_value_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/extreme_value.hpp>
+extern "C" __global__ 
+void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_extreme_value_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::extreme_value_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_extreme_value_pdf_nvrtc_float.cpp b/test/test_extreme_value_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..adbd263489
--- /dev/null
+++ b/test/test_extreme_value_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/extreme_value.hpp>
+extern "C" __global__ 
+void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_extreme_value_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::extreme_value_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_extreme_value_quan_double.cu b/test/test_extreme_value_quan_double.cu
new file mode 100644
index 0000000000..41f2f69a68
--- /dev/null
+++ b/test/test_extreme_value_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::extreme_value_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_extreme_value_quan_float.cu b/test/test_extreme_value_quan_float.cu
new file mode 100644
index 0000000000..5fe16e9a8c
--- /dev/null
+++ b/test/test_extreme_value_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::extreme_value_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 2000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_extreme_value_quan_nvrtc_double.cpp b/test/test_extreme_value_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..239df99949
--- /dev/null
+++ b/test/test_extreme_value_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/extreme_value.hpp>
+extern "C" __global__ 
+void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_extreme_value_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::extreme_value_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_extreme_value_quan_nvrtc_float.cpp b/test/test_extreme_value_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..fc9d8c5f41
--- /dev/null
+++ b/test/test_extreme_value_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/extreme_value.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/extreme_value.hpp>
+extern "C" __global__ 
+void test_extreme_value_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::extreme_value_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_extreme_value_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_extreme_value_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_extreme_value_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::extreme_value_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_fisher_f.cpp b/test/test_fisher_f.cpp
index c18ed8ff1c..f142a33273 100644
--- a/test/test_fisher_f.cpp
+++ b/test/test_fisher_f.cpp
@@ -8,9 +8,13 @@
 // (See accompanying file LICENSE_1_0.txt
 // or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#include <boost/math/tools/test.hpp>
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
+#endif
 
 #include <boost/math/distributions/fisher_f.hpp> // for fisher_f_distribution
 using boost::math::fisher_f_distribution;
diff --git a/test/test_fisher_f_cdf_double.cu b/test/test_fisher_f_cdf_double.cu
new file mode 100644
index 0000000000..c6d6f0a94c
--- /dev/null
+++ b/test/test_fisher_f_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_fisher_f_cdf_float.cu b/test/test_fisher_f_cdf_float.cu
new file mode 100644
index 0000000000..9df1bc8695
--- /dev/null
+++ b/test/test_fisher_f_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_fisher_f_cdf_nvrtc_double.cpp b/test/test_fisher_f_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1eb9cb00f6
--- /dev/null
+++ b/test/test_fisher_f_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/fisher_f.hpp>
+extern "C" __global__ 
+void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_fisher_f_cdf_nvrtc_float.cpp b/test/test_fisher_f_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..244190cf19
--- /dev/null
+++ b/test/test_fisher_f_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/fisher_f.hpp>
+extern "C" __global__ 
+void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_fisher_f_pdf_double.cu b/test/test_fisher_f_pdf_double.cu
new file mode 100644
index 0000000000..77a3b655ab
--- /dev/null
+++ b/test/test_fisher_f_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_fisher_f_pdf_float.cu b/test/test_fisher_f_pdf_float.cu
new file mode 100644
index 0000000000..323edf3424
--- /dev/null
+++ b/test/test_fisher_f_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_fisher_f_pdf_nvrtc_double.cpp b/test/test_fisher_f_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..8aa1482aae
--- /dev/null
+++ b/test/test_fisher_f_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/fisher_f.hpp>
+extern "C" __global__ 
+void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_fisher_f_pdf_nvrtc_float.cpp b/test/test_fisher_f_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..e461dea9af
--- /dev/null
+++ b/test/test_fisher_f_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/fisher_f.hpp>
+extern "C" __global__ 
+void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_fisher_f_quan_double.cu b/test/test_fisher_f_quan_double.cu
new file mode 100644
index 0000000000..c16eb2a952
--- /dev/null
+++ b/test/test_fisher_f_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_fisher_f_quan_float.cu b/test/test_fisher_f_quan_float.cu
new file mode 100644
index 0000000000..85cf479670
--- /dev/null
+++ b/test/test_fisher_f_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::fisher_f_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_fisher_f_quan_nvrtc_double.cpp b/test/test_fisher_f_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..16ad0cbc03
--- /dev/null
+++ b/test/test_fisher_f_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/fisher_f.hpp>
+extern "C" __global__ 
+void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_fisher_f_quan_nvrtc_float.cpp b/test/test_fisher_f_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..377048e526
--- /dev/null
+++ b/test/test_fisher_f_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/fisher_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/fisher_f.hpp>
+extern "C" __global__ 
+void test_fisher_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_fisher_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_fisher_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_fisher_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::fisher_f_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_float_io.cpp b/test/test_float_io.cpp
index 107cc39d4a..4c1ee1760a 100644
--- a/test/test_float_io.cpp
+++ b/test/test_float_io.cpp
@@ -301,7 +301,7 @@ void test()
          std::ios_base::fixed | std::ios_base::showpos}};
 
    std::array<std::array<const char*, 13 * 9>, 40> string_data = {{
-#include "libs/math/test/string_data.ipp"
+#include "string_data.ipp"
    }};
 
    double num   = 123456789.0;
@@ -384,10 +384,10 @@ T generate_random()
       val += gen();
    }
    e_type e;
-   val = frexp(val, &e);
+   val = std::frexp(val, &e);
 
    static boost::random::uniform_int_distribution<e_type> ui(0, std::numeric_limits<T>::max_exponent - 10);
-   return ldexp(val, ui(gen));
+   return std::ldexp(val, ui(gen));
 }
 
 template <class T>
diff --git a/test/test_fpclassify_nvrtc_double.cpp b/test/test_fpclassify_nvrtc_double.cpp
new file mode 100644
index 0000000000..0a99ddaa8e
--- /dev/null
+++ b/test/test_fpclassify_nvrtc_double.cpp
@@ -0,0 +1,198 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/fpclassify.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::isnan(in1[i]) + 
+                 boost::math::isinf(in1[i]) + 
+                 boost::math::isfinite(in1[i]) +
+                 boost::math::isnormal(in1[i]) +
+                 boost::math::fpclassify(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::isnan(h_in1[i]) + 
+                       boost::math::isinf(h_in1[i]) + 
+                       boost::math::isfinite(h_in1[i]) +
+                       boost::math::isnormal(h_in1[i]) +
+                       boost::math::fpclassify(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_fpclassify_nvrtc_float.cpp b/test/test_fpclassify_nvrtc_float.cpp
new file mode 100644
index 0000000000..04416e77d2
--- /dev/null
+++ b/test/test_fpclassify_nvrtc_float.cpp
@@ -0,0 +1,198 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/fpclassify.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::isnan(in1[i]) + 
+                 boost::math::isinf(in1[i]) + 
+                 boost::math::isfinite(in1[i]) +
+                 boost::math::isnormal(in1[i]) +
+                 boost::math::fpclassify(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::isnan(h_in1[i]) + 
+                       boost::math::isinf(h_in1[i]) + 
+                       boost::math::isfinite(h_in1[i]) +
+                       boost::math::isnormal(h_in1[i]) +
+                       boost::math::fpclassify(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma.cpp b/test/test_gamma.cpp
index 6b2d19cca6..fb86080d72 100644
--- a/test/test_gamma.cpp
+++ b/test/test_gamma.cpp
@@ -3,7 +3,12 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#include <boost/math/special_functions/gamma.hpp>
 #include "test_gamma.hpp"
 
 //
diff --git a/test/test_gamma.hpp b/test/test_gamma.hpp
index c21573dac6..7376573ad2 100644
--- a/test/test_gamma.hpp
+++ b/test/test_gamma.hpp
@@ -4,8 +4,7 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
-
+#include <boost/math/tools/config.hpp>
 #include <boost/math/concepts/real_concept.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #define BOOST_TEST_MAIN
@@ -13,7 +12,7 @@
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/next.hpp>  // for has_denorm_now
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
@@ -320,11 +319,13 @@ void test_spots(T, const char* name)
       BOOST_CHECK(sign == -1);
    }
 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    if(boost::math::detail::has_denorm_now<T>() && std::numeric_limits<T>::has_infinity && (boost::math::isinf)(1 / std::numeric_limits<T>::denorm_min()))
    {
       BOOST_CHECK_EQUAL(boost::math::tgamma(-std::numeric_limits<T>::denorm_min()), -std::numeric_limits<T>::infinity());
       BOOST_CHECK_EQUAL(boost::math::tgamma(std::numeric_limits<T>::denorm_min()), std::numeric_limits<T>::infinity());
    }
+   #endif
    //
    // Extra large values for lgamma, see https://github.com/boostorg/math/issues/242
    //
diff --git a/test/test_gamma_dist.cpp b/test/test_gamma_dist.cpp
index b7776c79cb..2b1a181f33 100644
--- a/test/test_gamma_dist.cpp
+++ b/test/test_gamma_dist.cpp
@@ -15,16 +15,23 @@
 // From MathWorld--A Wolfram Web Resource.
 // http://mathworld.wolfram.com/GammaDistribution.html
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp> // include directory libs/math/src/tr1/ is needed.
+#endif
+
+#include <boost/math/tools/config.hpp>
 
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/math/distributions/gamma.hpp>
     using boost::math::gamma_distribution;
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include "test_out_of_range.hpp"
 
 #include <iostream>
diff --git a/test/test_gamma_dist_cdf_double.cu b/test/test_gamma_dist_cdf_double.cu
new file mode 100644
index 0000000000..4777196aa1
--- /dev/null
+++ b/test/test_gamma_dist_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_gamma_dist_cdf_float.cu b/test/test_gamma_dist_cdf_float.cu
new file mode 100644
index 0000000000..a93aca3950
--- /dev/null
+++ b/test/test_gamma_dist_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_gamma_dist_cdf_nvrtc_double.cpp b/test/test_gamma_dist_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..3e911f4e05
--- /dev/null
+++ b/test/test_gamma_dist_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_dist_cdf_nvrtc_float.cpp b/test/test_gamma_dist_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..17762d4066
--- /dev/null
+++ b/test/test_gamma_dist_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_dist_pdf_double.cu b/test/test_gamma_dist_pdf_double.cu
new file mode 100644
index 0000000000..a8411d5b6d
--- /dev/null
+++ b/test/test_gamma_dist_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_gamma_dist_pdf_float.cu b/test/test_gamma_dist_pdf_float.cu
new file mode 100644
index 0000000000..6ab3247acb
--- /dev/null
+++ b/test/test_gamma_dist_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_gamma_dist_pdf_nvrtc_double.cpp b/test/test_gamma_dist_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1faae99866
--- /dev/null
+++ b/test/test_gamma_dist_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_dist_pdf_nvrtc_float.cpp b/test/test_gamma_dist_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..054ddbbadc
--- /dev/null
+++ b/test/test_gamma_dist_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_dist_quan_double.cu b/test/test_gamma_dist_quan_double.cu
new file mode 100644
index 0000000000..d29bf6d6be
--- /dev/null
+++ b/test/test_gamma_dist_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_gamma_dist_quan_float.cu b/test/test_gamma_dist_quan_float.cu
new file mode 100644
index 0000000000..58aa42e90f
--- /dev/null
+++ b/test/test_gamma_dist_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::gamma_distribution<float_type>(1, 1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_gamma_dist_quan_nvrtc_double.cpp b/test/test_gamma_dist_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..132efcd6c1
--- /dev/null
+++ b/test/test_gamma_dist_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_dist_quan_nvrtc_float.cpp b/test/test_gamma_dist_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..7749523abc
--- /dev/null
+++ b/test/test_gamma_dist_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::gamma_distribution<float_type>(1, 1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::gamma_distribution<float_type>(1, 1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_nvrtc_double.cpp b/test/test_gamma_nvrtc_double.cpp
new file mode 100644
index 0000000000..9fe2933720
--- /dev/null
+++ b/test/test_gamma_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma(in1[i]) + boost::math::lgamma(in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::tgamma(h_in1[i]) + boost::math::lgamma(h_in2[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_nvrtc_float.cpp b/test/test_gamma_nvrtc_float.cpp
new file mode 100644
index 0000000000..5d34b130ad
--- /dev/null
+++ b/test/test_gamma_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma(in1[i]) + boost::math::lgamma(in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/math/boost-root/libs/math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::tgamma(h_in1[i]) + boost::math::lgamma(h_in2[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_p_derivative_double.cu b/test/test_gamma_p_derivative_double.cu
new file mode 100644
index 0000000000..566bc1657f
--- /dev/null
+++ b/test/test_gamma_p_derivative_double.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::gamma_p_derivative(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_gamma_p_derivative_float.cu b/test/test_gamma_p_derivative_float.cu
new file mode 100644
index 0000000000..f9fd52a50c
--- /dev/null
+++ b/test/test_gamma_p_derivative_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::gamma_p_derivative(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_gamma_p_derivative_nvrtc_double.cpp b/test/test_gamma_p_derivative_nvrtc_double.cpp
new file mode 100644
index 0000000000..53a752c2df
--- /dev/null
+++ b/test/test_gamma_p_derivative_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_p_derivative_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_derivative_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_p_derivative_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_derivative_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::gamma_p_derivative(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_p_derivative_nvrtc_float.cpp b/test/test_gamma_p_derivative_nvrtc_float.cpp
new file mode 100644
index 0000000000..da9c50855b
--- /dev/null
+++ b/test/test_gamma_p_derivative_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_p_derivative_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_derivative(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_derivative_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_p_derivative_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_derivative_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::gamma_p_derivative(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_p_inv_double.cu b/test/test_gamma_p_inv_double.cu
new file mode 100644
index 0000000000..4392f37d38
--- /dev/null
+++ b/test/test_gamma_p_inv_double.cu
@@ -0,0 +1,108 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_inv(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 gen(42);
+    std::uniform_real_distribution<float_type> dist(0, 1);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+        input_vector2[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::gamma_p_inv(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                      << "Error found was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_gamma_p_inv_float.cu b/test/test_gamma_p_inv_float.cu
new file mode 100644
index 0000000000..70033686c1
--- /dev/null
+++ b/test/test_gamma_p_inv_float.cu
@@ -0,0 +1,107 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_inv(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 gen(42);
+    std::uniform_real_distribution<float_type> dist(0, 1);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+        input_vector2[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::gamma_p_inv(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_gamma_p_inv_nvrtc_double.cpp b/test/test_gamma_p_inv_nvrtc_double.cpp
new file mode 100644
index 0000000000..d270dbf901
--- /dev/null
+++ b/test/test_gamma_p_inv_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_p_inv_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_inv(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_inv_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_p_inv_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_inv_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::gamma_p_inv(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gamma_p_inv_nvrtc_float.cpp b/test/test_gamma_p_inv_nvrtc_float.cpp
new file mode 100644
index 0000000000..7c844eb682
--- /dev/null
+++ b/test/test_gamma_p_inv_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_gamma_p_inv_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::gamma_p_inv(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_p_inv_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_p_inv_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_p_inv_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::gamma_p_inv(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gegenbauer_double.cu b/test/test_gegenbauer_double.cu
new file mode 100644
index 0000000000..21278d7a82
--- /dev/null
+++ b/test/test_gegenbauer_double.cu
@@ -0,0 +1,125 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+//  Gegenbauer prime uses all methods internally so it's the easy choice
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include <boost/math/special_functions/gegenbauer.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::gegenbauer_prime(2, in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::gegenbauer_prime(2, input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
+            // Most elements are under 50 but extremely small numbers very more greatly
+            if (eps > 1000)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i]
+                          << "\n  Host: " << results[i]
+                          << "\n   Eps: " << eps << std::endl;
+                ++failure_counter;
+                if (failure_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_gegenbauer_float.cu b/test/test_gegenbauer_float.cu
new file mode 100644
index 0000000000..b7affaecd4
--- /dev/null
+++ b/test/test_gegenbauer_float.cu
@@ -0,0 +1,124 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+//
+//  Gegenbauer prime uses all methods internally so it's the easy choice
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include <boost/math/special_functions/gegenbauer.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::gegenbauer_prime(2, in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::gegenbauer_prime(2, input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
+            if (eps > 1000)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i]
+                          << "\n  Host: " << results[i]
+                          << "\n   Eps: " << eps << std::endl;
+                ++failure_counter;
+                if (failure_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_gegenbauer_nvrtc_double.cpp b/test/test_gegenbauer_nvrtc_double.cpp
new file mode 100644
index 0000000000..0c8416cb61
--- /dev/null
+++ b/test/test_gegenbauer_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gegenbauer.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gegenbauer.hpp>
+extern "C" __global__ 
+void test_gegenbauer_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::gegenbauer(2, in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gegenbauer_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gegenbauer_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gegenbauer_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::gegenbauer(2, h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_gegenbauer_nvrtc_float.cpp b/test/test_gegenbauer_nvrtc_float.cpp
new file mode 100644
index 0000000000..c0d3484175
--- /dev/null
+++ b/test/test_gegenbauer_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gegenbauer.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gegenbauer.hpp>
+extern "C" __global__ 
+void test_gegenbauer_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::gegenbauer(2, in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gegenbauer_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gegenbauer_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gegenbauer_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::gegenbauer(2, h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_geometric.cpp b/test/test_geometric.cpp
index 928a2aa0ed..13a9e090b9 100644
--- a/test/test_geometric.cpp
+++ b/test/test_geometric.cpp
@@ -26,9 +26,14 @@
 #  define TEST_REAL_CONCEPT
 #endif
 
-#include <boost/math/tools/test.hpp>
+#include <boost/math/tools/config.hpp>
+
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
+#endif
 
 #include <boost/math/distributions/geometric.hpp> // for geometric_distribution
 using boost::math::geometric_distribution;
@@ -64,7 +69,11 @@ void test_spot( // Test a single spot value against 'known good' values.
                RealType tol,     // Test tolerance
                RealType logtol)  // Logcdf Test tolerance.
 {
-   BOOST_IF_CONSTEXPR (std::is_same<RealType, long double>::value || std::is_same<RealType, real_concept>::value)
+   BOOST_IF_CONSTEXPR (std::is_same<RealType, long double>::value 
+                       #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
+                       || std::is_same<RealType, real_concept>::value
+                       #endif
+                       )
    {
      logtol *= 100;
    }
@@ -376,7 +385,9 @@ if(std::numeric_limits<RealType>::is_specialized)
   static_cast<RealType>(9.9000000000003448e-201L), //
   100 * tolerance); // Note difference
 
-    // p nearer unity.
+  // p nearer unity.
+  // On GPU this gets flushed to 0 which has an eps difference of 3.4e+38
+  #ifndef BOOST_MATH_HAS_GPU_SUPPORT
   BOOST_CHECK_CLOSE_FRACTION( //
   pdf(geometric_distribution<RealType>(static_cast<RealType>(0.9999)),
   static_cast<RealType>(10) ),  // Number of failures, k
@@ -384,6 +395,7 @@ if(std::numeric_limits<RealType>::is_specialized)
   // static_cast<float>(1.00156406e-040)
   static_cast<RealType>(9.999e-41), // exact from 100 digit calculator.
   2e3 * tolerance); // Note bigger tolerance needed.
+  #endif
 
   // Moshier Cephes 100 digits calculator says 9.999e-41
   //0.9999*pow(1-0.9999,10)
diff --git a/test/test_geometric_dist_cdf_double.cu b/test/test_geometric_dist_cdf_double.cu
new file mode 100644
index 0000000000..98b6510ad1
--- /dev/null
+++ b/test/test_geometric_dist_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_geometric_dist_cdf_float.cu b/test/test_geometric_dist_cdf_float.cu
new file mode 100644
index 0000000000..2662ac07c5
--- /dev/null
+++ b/test/test_geometric_dist_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_geometric_dist_cdf_nvrtc_double.cpp b/test/test_geometric_dist_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f8c5ed5aad
--- /dev/null
+++ b/test/test_geometric_dist_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/geometric.hpp>
+extern "C" __global__ 
+void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_geometric_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_geometric_dist_cdf_nvrtc_float.cpp b/test/test_geometric_dist_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..a53cd0d972
--- /dev/null
+++ b/test/test_geometric_dist_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/geometric.hpp>
+extern "C" __global__ 
+void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_geometric_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_geometric_dist_pdf_double.cu b/test/test_geometric_dist_pdf_double.cu
new file mode 100644
index 0000000000..03d2dc0078
--- /dev/null
+++ b/test/test_geometric_dist_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_geometric_dist_pdf_float.cu b/test/test_geometric_dist_pdf_float.cu
new file mode 100644
index 0000000000..1034d122b5
--- /dev/null
+++ b/test/test_geometric_dist_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_geometric_dist_pdf_nvrtc_double.cpp b/test/test_geometric_dist_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..8a6b5756e6
--- /dev/null
+++ b/test/test_geometric_dist_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/geometric.hpp>
+extern "C" __global__ 
+void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_geometric_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_geometric_dist_pdf_nvrtc_float.cpp b/test/test_geometric_dist_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..dfb05105dd
--- /dev/null
+++ b/test/test_geometric_dist_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/geometric.hpp>
+extern "C" __global__ 
+void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_geometric_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_geometric_dist_quan_double.cu b/test/test_geometric_dist_quan_double.cu
new file mode 100644
index 0000000000..fcac938e5a
--- /dev/null
+++ b/test/test_geometric_dist_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_geometric_dist_quan_float.cu b/test/test_geometric_dist_quan_float.cu
new file mode 100644
index 0000000000..0749522021
--- /dev/null
+++ b/test/test_geometric_dist_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch geometric distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::geometric_distribution<float_type>(0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_geometric_dist_quan_nvrtc_double.cpp b/test/test_geometric_dist_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..52b2e97ec4
--- /dev/null
+++ b/test/test_geometric_dist_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/geometric.hpp>
+extern "C" __global__ 
+void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_geometric_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_geometric_dist_quan_nvrtc_float.cpp b/test/test_geometric_dist_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..a83cf857e8
--- /dev/null
+++ b/test/test_geometric_dist_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/geometric.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/geometric.hpp>
+extern "C" __global__ 
+void test_geometric_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::geometric_distribution<float_type>(0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_geometric_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_geometric_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_geometric_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::geometric_distribution<float_type>(0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_hankel.cpp b/test/test_hankel.cpp
index f8bd173da8..a93e90c4d1 100644
--- a/test/test_hankel.cpp
+++ b/test/test_hankel.cpp
@@ -3,9 +3,13 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
 
 #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#include <boost/math/tools/config.hpp>
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
@@ -85,6 +89,7 @@ void test_hankel(T, const char* name)
 //
 // Instantiate a few instances to check our error handling code can cope with std::complex:
 //
+#ifndef SYCL_LANGUAGE_VERSION
 typedef boost::math::policies::policy<
    boost::math::policies::overflow_error<boost::math::policies::throw_on_error>,
    boost::math::policies::denorm_error<boost::math::policies::throw_on_error>,
@@ -120,7 +125,7 @@ typedef boost::math::policies::policy<
    boost::math::policies::indeterminate_result_error<boost::math::policies::ignore_error> > pol3;
 
 template std::complex<double> boost::math::cyl_hankel_1<double, double, pol3>(double, double, const pol3&);
-
+#endif
 
 BOOST_AUTO_TEST_CASE( test_main )
 {
diff --git a/test/test_hermite.cpp b/test/test_hermite.cpp
index d1127feec2..60dafdb8f1 100644
--- a/test/test_hermite.cpp
+++ b/test/test_hermite.cpp
@@ -5,8 +5,15 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
-#include"test_hermite.hpp"
+#endif
+
+#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#endif
+
+#include "test_hermite.hpp"
 
 //
 // DESCRIPTION:
diff --git a/test/test_hermite.hpp b/test/test_hermite.hpp
index 0b00677eec..8f7c55ff10 100644
--- a/test/test_hermite.hpp
+++ b/test/test_hermite.hpp
@@ -11,11 +11,17 @@
 // Constants are too big for float case, but this doesn't matter for test.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/hermite.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/array.hpp>
 #include "functor.hpp"
diff --git a/test/test_hermite_double.cu b/test/test_hermite_double.cu
new file mode 100644
index 0000000000..a53766171a
--- /dev/null
+++ b/test/test_hermite_double.cu
@@ -0,0 +1,120 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::hermite(1U, in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::hermite(1U, input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    int fail_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i] << '\n'
+                          << "  Host: " << results[i] << '\n'
+                          << "   Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                fail_counter++;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (fail_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_hermite_float.cu b/test/test_hermite_float.cu
new file mode 100644
index 0000000000..c48560bbe5
--- /dev/null
+++ b/test/test_hermite_float.cu
@@ -0,0 +1,120 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::hermite(1U, in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::hermite(1U, input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    int fail_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 1000)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i] << '\n'
+                          << "  Host: " << results[i] << '\n'
+                          << "   Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                fail_counter++;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (fail_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_hermite_nvrtc_double.cpp b/test/test_hermite_nvrtc_double.cpp
new file mode 100644
index 0000000000..569d975cb6
--- /dev/null
+++ b/test/test_hermite_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/hermite.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hermite.hpp>
+extern "C" __global__ 
+void test_hermite_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::hermite(1U, in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_hermite_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_hermite_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_hermite_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::hermite(1U, h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_hermite_nvrtc_float.cpp b/test/test_hermite_nvrtc_float.cpp
new file mode 100644
index 0000000000..e2e907c519
--- /dev/null
+++ b/test/test_hermite_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/hermite.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hermite.hpp>
+extern "C" __global__ 
+void test_hermite_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::hermite(1U, in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_hermite_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_hermite_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_hermite_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::hermite(1U, h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_heuman_lambda.cpp b/test/test_heuman_lambda.cpp
index 83709c635b..cdcf39aa68 100644
--- a/test/test_heuman_lambda.cpp
+++ b/test/test_heuman_lambda.cpp
@@ -4,7 +4,10 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
 #include "test_heuman_lambda.hpp"
 
 //
diff --git a/test/test_heuman_lambda.hpp b/test/test_heuman_lambda.hpp
index 23720b2d02..6081dac482 100644
--- a/test/test_heuman_lambda.hpp
+++ b/test/test_heuman_lambda.hpp
@@ -8,11 +8,17 @@
 // Constants are too big for float case, but this doesn't matter for test.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/heuman_lambda.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/array.hpp>
 #include "functor.hpp"
diff --git a/test/test_heuman_lambda_double.cu b/test/test_heuman_lambda_double.cu
new file mode 100644
index 0000000000..361dbe8051
--- /dev/null
+++ b/test/test_heuman_lambda_double.cu
@@ -0,0 +1,120 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::heuman_lambda(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::heuman_lambda(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    int fail_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i] << '\n'
+                          << "  Host: " << results[i] << '\n'
+                          << "   Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                fail_counter++;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (fail_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_heuman_lambda_float.cu b/test/test_heuman_lambda_float.cu
new file mode 100644
index 0000000000..361dbe8051
--- /dev/null
+++ b/test/test_heuman_lambda_float.cu
@@ -0,0 +1,120 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::heuman_lambda(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::heuman_lambda(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    int fail_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i] << '\n'
+                          << "  Host: " << results[i] << '\n'
+                          << "   Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                fail_counter++;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (fail_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_heumann_lambda_nvrtc_double.cpp b/test/test_heumann_lambda_nvrtc_double.cpp
new file mode 100644
index 0000000000..38c762fd51
--- /dev/null
+++ b/test/test_heumann_lambda_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/heuman_lambda.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/heuman_lambda.hpp>
+extern "C" __global__ 
+void test_heuman_lambda_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::heuman_lambda(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_heuman_lambda_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_heuman_lambda_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_heuman_lambda_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::heuman_lambda(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_heumann_lambda_nvrtc_float.cpp b/test/test_heumann_lambda_nvrtc_float.cpp
new file mode 100644
index 0000000000..5139b9d6f6
--- /dev/null
+++ b/test/test_heumann_lambda_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/heuman_lambda.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/heuman_lambda.hpp>
+extern "C" __global__ 
+void test_heuman_lambda_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::heuman_lambda(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_heuman_lambda_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_heuman_lambda_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_heuman_lambda_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::heuman_lambda(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_holtsmark.cpp b/test/test_holtsmark.cpp
new file mode 100644
index 0000000000..93a40924d6
--- /dev/null
+++ b/test/test_holtsmark.cpp
@@ -0,0 +1,917 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_TEST_MAIN
+#define BOOST_TEST_MODULE StatsHoltsmarkTest
+#include <boost/math/tools/config.hpp>
+#include <boost/test/included/unit_test.hpp>
+#include <boost/test/tools/floating_point_comparison.hpp>
+
+#include <boost/math/distributions/holtsmark.hpp>
+
+#if __has_include(<stdfloat>)
+# include <stdfloat>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
+using boost::math::holtsmark_distribution;
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#include <boost/multiprecision/cpp_bin_float.hpp>
+using boost::multiprecision::cpp_bin_float_quad;
+#endif
+
+template<class RealType, int N>
+void do_test_holtsmark_pdf(){
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    holtsmark_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36729417918039395222067998266923903487897550760740e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65389736963758327689008908803579458127136270822821e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02515191704410688567167143509210415364664018836038e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51083986231955529936787758130352472694082331202869e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15094236163249353135030241188004077293096105502542e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99750209903363198419241505065146206315152726747464e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.11488945306717663129360225856869217115733169200098e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57635410598104651856363821355027691095093972951943e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.45396231261375200568114750897618690566092315194568e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.56644599840900478087175884712634478003230341866094e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07991505579130717014680432847812811882295188855215e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21504440259916207727077397273468920426729181666284e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36133628073378183373326886775069575640127303211029e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51745548085348400860371488668500734429223868343929e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68134487107062900924723590620591092812119992658420e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85018067925573560771430043931430243630326746823000e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02038159607840130388931544845552929991729709746772e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10468477092312109723487937526691724501188944561469e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18768434960425041116444711570747254236793293668156e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26876533945661456653252880545328465490361724757011e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34729279420630671501163324236373491976559833675371e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42262031889641529582832839126322520342094549884605e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.49409942058625893002692983261905908575520500298169e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56108951988457608160176526934999395388199046235972e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62296840354090035789597147663858548813023471783846e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67914286936215258107106284322230987052088197806732e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72905929336501384188469491706325278582273134014420e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77221383677408177290219591627664575751370295766362e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80816200831657887972174538033323818453141437738815e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83652730587590691770044766018047580182775092180058e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85700868106012048340895194764008089858950037497619e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86938660017923959009561433879636172948429661600328e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.87352751452164445024482162286994868261727837966217e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86938660017923959009561433879636172948429661600328e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85700868106012048340895194764008089858950037497619e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83652730587590691770044766018047580182775092180058e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80816200831657887972174538033323818453141437738815e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77221383677408177290219591627664575751370295766362e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72905929336501384188469491706325278582273134014420e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67914286936215258107106284322230987052088197806732e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62296840354090035789597147663858548813023471783846e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56108951988457608160176526934999395388199046235972e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.49409942058625893002692983261905908575520500298169e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42262031889641529582832839126322520342094549884605e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34729279420630671501163324236373491976559833675371e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26876533945661456653252880545328465490361724757011e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18768434960425041116444711570747254236793293668156e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10468477092312109723487937526691724501188944561469e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02038159607840130388931544845552929991729709746772e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85018067925573560771430043931430243630326746823000e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68134487107062900924723590620591092812119992658420e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51745548085348400860371488668500734429223868343929e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36133628073378183373326886775069575640127303211029e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21504440259916207727077397273468920426729181666284e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07991505579130717014680432847812811882295188855215e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.56644599840900478087175884712634478003230341866094e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.45396231261375200568114750897618690566092315194568e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57635410598104651856363821355027691095093972951943e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.11488945306717663129360225856869217115733169200098e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99750209903363198419241505065146206315152726747464e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15094236163249353135030241188004077293096105502542e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51083986231955529936787758130352472694082331202869e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02515191704410688567167143509210415364664018836038e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65389736963758327689008908803579458127136270822821e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36729417918039395222067998266923903487897550760740e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.66754576694881156512310862711445437434536539665220e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.11173604765480684115169149814306652067264621028219e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.40661030133408839114013026681116038222350468593972e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22346322254737897976061662951210609505224644527458e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37441612177611972649583292419049409747033180591176e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74744460065068339118467750880557414156817094839507e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.27304026309950351561235691054710976330201642992015e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90649774685568282389553481307707005424869182652166e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38767874243521145729361398147898730301527980427894e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04777602492944046116536405570429208835346612226751e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.14427312838534546510639939284636768866987456541955e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.48125490715334110982111302156190976484065179596964e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25926848543648125740563360613612653857829343973075e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.33819608589296696976594994753563070519825392999328e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62882342819480981578669644070664737082208227167328e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.07231582988207590928480356376941073734041404814646e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.27006701837892764913571864450685488819589225207004e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73366906892470965030093227280098921046549874789447e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.35957320021440204694920969306620767773792096574145e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08962051920666002556891360954800856160496745325982e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.89257005093845424113694708405490444536741708307432e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.36985573727704971748849427466806366268717813920884e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.18908057108330090847508167263164576897080536147998e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25741312407933720816582583160953651639222885045617e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90521803068812235229266578780465914597024834788499e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99440098605222381975051163529379005706849379136631e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35556983515831713649398537010595127693244595946631e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89252591391344813399503397931059007207167187335384e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54761819718387004182264813450582914982582239574760e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28473144505453904388319480756963924103363111590946e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08038477619830474574170723303117982155643976342020e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.18821097500148811906668422616947735567544539343910e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.83770756909753597744679176403606964363170824955403e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25030682765210472090258978149127752580598842866547e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.13470581474698689445958562292807701026370826971490e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.32481553840568432886657947564022812389544240910245e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72079249234488105623103527050493983216840056389979e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25993583492605042781181768463472827114252734385354e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90139785097711606987697985776461422964227374409867e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61772178749879746562419160426660201168856327635754e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85568203412051586615998003517909621954036193765824e-7), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.04563780918059698346140819518179336362098891429611e-8), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.91792339891869109849236922708731694322153177924824e-9), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57638176923363231335450711092364627257129748508825e-9), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78661356398095539721893167504764083609118624639856e-10), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.92604459926540711296353370623384343787650705186643e-11), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70807462044367666228538283653300633948804226008283e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53938313936621297172285881159282265471330118114236e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72126969520581558576219170420297785383800313247716e-13), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.81057004571925961169117504945825287978291651680376e-14), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.50396638201843310535816082381660912483279969324341e-15), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50330305081854272655464104542050109297998338121296e-15), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65748943908780839905864199570025333227786061957216e-16), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.69782199924985143903444545973473854205301886566784e-17), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.30465447555059221873312238790689561215989143051468e-18), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46806937341565301230797169380667244412810953871153e-18), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.59520452276585659227559942516968833301265805493281e-19), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58771679139660099100514647692148711295768774585921e-20), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.11001413331373018059574227843565648889473010676660e-21), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43366149729083958029796238740037446793377590904146e-21), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.53437941664766678806680232945039585344994407777604e-22), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48019217902582750507529988918628139079404963189635e-23), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.91993567701892984811136675864172386794777828712695e-24), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40006005594525679129869441468227938628239752074460e-24), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.47497989906821913751146663525981445654490373951224e-25), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.73431218458810807033921781897016124929992555367139e-27), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.41697255768378077454412606000376285104775580444420e-28), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55303924276181374957661287953452523234530316565188e-30), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36032476336306675100543445227730858361169873925804e-31), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37601488550958357902586661939110889053699438042016e-33), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30500465172174486774768816059192932509423715417166e-34), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20313953663045271143891020576861031160487434865093e-36), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25098110519701647231401040429952006498171313622520e-37), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03431595374067647597712273413788666502302011992057e-39), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19822373554396139874268836303925702273981755123190e-40), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.86944917357487937107083766130282172728072819621788e-42), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14670286674214980345963428973545770896346187182759e-43), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.70844645856921813581135618689921390269372237700480e-45), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09638951830288066744104877057303202280205793138993e-46), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55121724469650208575327739326222025805731763269349e-48), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04725538896765690179789918481715848637732139291593e-49), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.99927284078872744316201092259372825667966088624440e-52), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95241488358274164371290129159422806319923560612665e-55), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90665515974877113643838016757246988460215123083510e-58), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86196792944215931292810563239498984002947733280447e-61), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.81832805609585870403135315663573226135611325562940e-64), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77571099228111201565561831702708228641500206234884e-67), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73409276589952345278868976272176004532614982149466e-70), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69344996669875337186395484640796879426380291665101e-73), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65375973310425133971089340469528202564824480281467e-76), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61499973935962044893641934052273635317211406169375e-79), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57714818296837934466447201222923471989464263831856e-82), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54018377243005795377389844944261203114711195148214e-85), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50408571526372847048232270453380081166710151511926e-88), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46883370631223483445539326614628985514365382335866e-91), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43440791632054183052284498647098618666372443687369e-94), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40078898078177913136996580710057244791379339538446e-97), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40078898078177913136996580710057244791379339538446e-97), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33589647367647088181492405614907498160724010027357e-103), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.27401015632292831593983083357722757492755899455411e-109), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21499076492588836282713969571802861683612727599536e-115), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15870548718060337336267442294886457141506888961350e-121), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10502766340313279472606127066503960744387520753240e-127), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05383650150597838852506758753303490394961853745690e-133), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.00501680517766798832422980073264589686357358689966e-139), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.58458714654605854343633461697240731109212481403024e-146), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.14057459501844267219193898865929347142422181513809e-152), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.71713122846454875201410197130135867254659825815019e-158), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.31330416532950282288942525034080378775272203269023e-164), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.92818466694784433640425229105072382712623790043853e-170), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56090609259399827614236096482345946037887373012402e-176), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.21064194926643207182155701143594690359008191120531e-182), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87660403181689459974437428611368837698944274063617e-188), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55804064923943958258092335330361211489624284804933e-194), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_holtsmark_cdf() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    holtsmark_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.05754562114095147060025732340404110273918791128410e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.43368449353064312968788810247763526151579823896375e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.89152652981222928500336992235218785823347685619486e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45576815036790322986175114501424442292398775002753e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15978035591850473786135203626730317231876547935253e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04840867577215219812315602202859291564396444891271e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.18107352643844092577238041212187564083206726822056e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.63452659158632535349648745626466375370661702427253e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05039829654829170780787685299557006719608839147146e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16290125976286647650587923507379034571826555851855e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29006094814340314434551699445775276932192677014796e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43337488600334723245402521343673257218591019586678e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59428804524998597404889957316726146572475641692056e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77411943695232738645173996334900914289694617802622e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97397674713122675501528813572074060920189377220435e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19466281873758062089629668872911218009781881016957e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43657975600729535499895880792984203156689462603233e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56549331000915958092357114949075709547374035120716e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69963821328456787766164921360924665804748003656962e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83891389704828738280543932012082190429368746897774e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98318069008956501931929973721256792184639224594009e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13225883262651243874304479279543149837428487068880e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.28592804901375835801811223649103787689862120396348e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44392772657854742558827130471487286937515502098876e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60595773518728397925852903878144782668636743128847e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77167990746831553875195022302050315427406387091454e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.94072018335986771952143399033763464047462959349960e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.11267140533666880513239944176560287064005137544504e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28709673289702231328042170648699212093305785288837e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46353362731996489993314637378816744341919505345430e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64149834100915287958166424201293669944118353975980e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.82049083059559169016945833205795772857456974521089e-1), tolerance);
+    BOOST_CHECK_EQUAL(cdf(dist, static_cast<RealType>(0)), static_cast<RealType>(0.5));
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.17950916940440830983054166794204227142543025478911e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.35850165899084712041833575798706330055881646024020e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53646637268003510006685362621183255658080494654570e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.71290326710297768671957829351300787906694214711163e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.88732859466333119486760055823439712935994862455496e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05927981664013228047856600966236535952537040650040e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22832009253168446124804977697949684572593612908546e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.39404226481271602074147096121855217331363256871153e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55607227342145257441172869528512713062484497901124e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.71407195098624164198188776350896212310137879603652e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.86774116737348756125695520720456850162571512931120e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01681930991043498068070026278743207815360775405991e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.16108610295171261719456067987917809570631253102226e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30036178671543212233835078639075334195251996343038e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43450668999084041907642885050924290452625964879284e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56342024399270464500104119207015796843310537396767e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80533718126241937910370331127088781990218118983044e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.02602325286877324498471186427925939079810622779565e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.22588056304767261354826003665099085710305382197378e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40571195475001402595110042683273853427524358307944e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56662511399665276754597478656326742781408980413322e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70993905185659685565448300554224723067807322985204e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83709874023713352349412076492620965428173444148145e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.13654734084136746465035125437353362462933829757275e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.28189264735615590742276195878781243591679327317794e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.39515913242278478018768439779714070843560355510873e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.48402196440814952621386479637326968276812345206475e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.55442318496320967701382488549857555770760122499725e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.61084734701877707149966300776478121417665231438051e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.65663155064693568703121118975223647384842017610363e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69424543788590485293997426765959588972608120887159e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.75181239051510561995724918481437181225561682663730e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.79330912859883809209439632732487322514831404016950e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.82433333964997258238374422700352745232108521574338e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.84823776850456273960749947374965581355459282720918e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.86712078422837181984231917376692653955733800370589e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.88234978933381804514525314568892574021644736001062e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.89484817977790488434819448808352507951600536883956e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.90525915297517643347206332876433307912297417265919e-1), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_holtsmark_ccdf() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    holtsmark_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83709874023713352349412076492620965428173444148145e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70993905185659685565448300554224723067807322985204e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56662511399665276754597478656326742781408980413322e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40571195475001402595110042683273853427524358307944e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.22588056304767261354826003665099085710305382197378e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.02602325286877324498471186427925939079810622779565e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80533718126241937910370331127088781990218118983044e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56342024399270464500104119207015796843310537396767e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43450668999084041907642885050924290452625964879284e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30036178671543212233835078639075334195251996343038e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.16108610295171261719456067987917809570631253102226e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01681930991043498068070026278743207815360775405991e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.86774116737348756125695520720456850162571512931120e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.71407195098624164198188776350896212310137879603652e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55607227342145257441172869528512713062484497901124e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.39404226481271602074147096121855217331363256871153e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22832009253168446124804977697949684572593612908546e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05927981664013228047856600966236535952537040650040e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.88732859466333119486760055823439712935994862455496e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.71290326710297768671957829351300787906694214711163e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53646637268003510006685362621183255658080494654570e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.35850165899084712041833575798706330055881646024020e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.17950916940440830983054166794204227142543025478911e-1), tolerance);
+    BOOST_CHECK_EQUAL(cdf(complement(dist, static_cast<RealType>(0))), static_cast<RealType>(0.5));
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.82049083059559169016945833205795772857456974521089e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64149834100915287958166424201293669944118353975980e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46353362731996489993314637378816744341919505345430e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28709673289702231328042170648699212093305785288837e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.11267140533666880513239944176560287064005137544504e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.94072018335986771952143399033763464047462959349960e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77167990746831553875195022302050315427406387091454e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60595773518728397925852903878144782668636743128847e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44392772657854742558827130471487286937515502098876e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.28592804901375835801811223649103787689862120396348e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13225883262651243874304479279543149837428487068880e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98318069008956501931929973721256792184639224594009e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83891389704828738280543932012082190429368746897774e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69963821328456787766164921360924665804748003656962e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56549331000915958092357114949075709547374035120716e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43657975600729535499895880792984203156689462603233e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19466281873758062089629668872911218009781881016957e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97397674713122675501528813572074060920189377220435e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77411943695232738645173996334900914289694617802622e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59428804524998597404889957316726146572475641692056e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43337488600334723245402521343673257218591019586678e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29006094814340314434551699445775276932192677014796e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16290125976286647650587923507379034571826555851855e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05039829654829170780787685299557006719608839147146e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.63452659158632535349648745626466375370661702427253e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.18107352643844092577238041212187564083206726822056e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04840867577215219812315602202859291564396444891271e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15978035591850473786135203626730317231876547935253e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45576815036790322986175114501424442292398775002753e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.89152652981222928500336992235218785823347685619486e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.43368449353064312968788810247763526151579823896375e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.05754562114095147060025732340404110273918791128410e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.48187609484894380042750815185628187744383173362701e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06690871401161907905603672675126774851685959830498e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.75666660350027417616255772996472547678914784256620e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51762231495437260392500526250344186445407172790817e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.32879215771628180157680826233073460442661996294112e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17650210666181954854746854311074259783552639989383e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05151820222095115651805511916474920483994631160437e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.47408470248235665279366712356669208770258273408139e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.84633233633455623701916449563937321750458546930453e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63980919776847052871529866313101745651230650390958e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.71560271091813267790502831701953317735310509411390e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.98877952779840995427042841099634486251917561421235e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.40474538743684446341559852404247422921805516802754e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.92694975034713587589009244180464471384939103569360e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53008216069181937774819568728358865566934398876406e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.19610991747326725339429696634365931201323237447865e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66754697245984312281273324441306444625571550964874e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.27055303995134836245432119316955904009319977641407e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96335385591609284864855802297989450933958012331026e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71983068404058084817697571696065106175519154567134e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52289416073896095009461377823763903270627907120760e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36093802515022944719982175339249822123873546752029e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22583027731897389283291308558313649709444449306352e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11172037056341396583040940446061500559620114648017e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.30343276748924876383449972567328892540120922780153e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.93479451609843506274951848311637162210971363421001e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87195324030611990483144403072151789810819131065125e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.02706331251660616582062908165140543120249377843861e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.34227281260431186934023112980425214646324423530835e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.77808682390572890160969127059116197868923042197157e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.30671561098656496307105837586195479010992455366704e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90809577465797739887717333706777298095814323854594e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.27353325758395877256315285908308787899438871193344e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79392283157821028307612118603031347679870090601645e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42100764912263851750335808792156725434230560510519e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.12427366298380215713674931256380175628765437052256e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.88357904993849478063900853297202135211787374336745e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68514784935653217641685603828708555083243580835252e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51927236784831507287987712585213807190192778503378e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37893523202524132194116984313653211252949573780743e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87179879353850251536193247663317269917137569329013e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72200715673082274403568012456413425331659480927047e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08767256634253255597053159005214917567906016620942e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15224951834965340756402503731619739933629215689572e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60926645137475638433041259135060814007792759468100e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69027136710097153757239920401079588116557633592788e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51153240121827639330112195094153363441740650435701e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36283287602527689849064009106938913094716520594050e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18894075854698171408179022464997418218329601220535e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.20354010552543796231728241342645692784842686349965e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48617582449547816069323420820736862370145659607645e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25442497729649541267939242007563203416972765165476e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85771976129306035185873711236944017216967773980517e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.56803119746276025902637885330726865801883592203156e-11), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32214969859659307190062902673645385449168267230206e-11), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.21003899305378486738410997502574389330326609057126e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90268712277390815798081242324390081166198190434103e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02625487407274396069298698869802596448489279633645e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62835890339366126281650716118851052269284048205938e-13), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28281859258171445903392888079435183340041265714998e-13), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.53544862923055721373031196622890601888700626111182e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60352324072570315319362346398633880353985161880622e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66931078653639661641440656550426788294070266782671e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.00440405090690395389846895694672344247801107283103e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.08663848317021453602605776312368607691806298813143e-16), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85829810396272422714320494875293186097734524880995e-17), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10728726299533984178525425523566315206520781873938e-17), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38410907874417469494912307476988489629927133322828e-18), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73013634843021835192352185216943643167993641440545e-19), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16267043553777293728520200407227686276581114275508e-20), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70333804442221617119725245647479628568092558840977e-21), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37917255552777021393262025049731570198920941773050e-22), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22396569440971276740578385685661655637545268535833e-23), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.27995711801214095925566865602936005935821644314758e-24), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.59994639751517619906934188799897966230541134026964e-25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.24993299689397024883663924561783076352358304956329e-26), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03124162461174628110457931016502737959110130110786e-26), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28905203076468285138072404465359649544991622324619e-27), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61131503845585356422590504127751316165005771606733e-28), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01414379806981695528238129932509731805283190086691e-29), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51767974758727119410297662380140381412701796292469e-30), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.93387460560511124078590097461169759773383672872924e-32), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14667907125798631372797027282887330061342582520339e-34), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.60418604884060361519995355129506804305208883665969e-36), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50065407013134431487499274238985426822797191535889e-37), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34477198458022549199217615998414729133523646527342e-39), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.66370622590660233123777524997523014264365635070860e-41), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.72454097797906614255902382808629709787906142136400e-43), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94459527809229084774847473138483921543599314796577e-45), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39759301220192044496069917677888112741187383092503e-46), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18373908156550069525109246371700176158105285841693e-48), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.41209231494609483632983197455781525247039509121778e-50), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33139424210327318176536246024658633198499233002634e-52), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.33030350328636434650837884413529114372655051566613e-54), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30160992238849442914193419439613924120727351807283e-55), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03376550373202254553427217874396756438636487198880e-57), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522739730027928744931935369511248250e-59), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522739730027928744931935369511248250e-59), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75819970600899713720044013497912431482835720820923e-63), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89409172509985281669932620482888777217489189653546e-66), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62424737573206254076983936725802678753635716927602e-70), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12896664446583558124263656427197919617586844953028e-73), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75626622184041889951815567449213670941374133186103e-77), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.72916558066508520390174725217806813821714192348884e-81), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64286269059206181735882501273878616655691941491427e-84), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01089524070327592128619387875680216444560404031804e-88), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.79222470874823222970262177430859903429102548905771e-92), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.39067986053423638420474164411831031110620739478948e-95), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.83662075325741304737485752958571853297413914743525e-99), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.42495623858823560726925232655901331371438944029181e-102), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.47889706686580958805969806288821609793552109446243e-106), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.49340104215285543959887222384818383285039329702741e-110), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.07358423880684947255831841402543550606699055103208e-113), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.06246152052453484511308206549178590348386364998067e-117), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 600))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35984768491636685338723260976696756010081951066709e-272), tolerance);
+
+    // The test is terminated because x is inf after this in fp64.
+    if (N <= 53) {
+        return;
+    }
+
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 10000))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.07856314267954244808378917796336533616174400834444e-4517), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_holtsmark_quantile_nearzero() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 4;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    holtsmark_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.95154561868642736172727541497862573947423988108440e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.70076903638473389271107254016757513871156973570753e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.14334937403136202998120100318789158621074793095204e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.78777320599109327486003359289950832456660816353247e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52359744792606448551215814625237535910408488746319e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.31030403921397562363480619469818801698833367088973e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.12887694529692805365864437467349833307816315605127e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.68933181713583005208786313049614919507807226788256e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.24180933056718074475811972779037648260058221511975e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.90498457639035315478919560434827057700411821656397e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.65011034985136468564856594364133703131867596374603e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.45602574935161564367593993713685007358536186330925e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.30636223130194458226267853223267146033119688836326e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.18783126811804488668597304309230757418655445388316e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.08910089454817618171205461377219663239656206354005e-1), tolerance);
+    BOOST_CHECK_EQUAL(quantile(dist, static_cast<RealType>(0.5)), static_cast<RealType>(0));
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.50390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.35942281074760935023421465350943213907657016740203e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.71903117534558589826852519194663375075753566669865e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.51171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07901080568320221484687881885423066805647774437405e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43954773006081419776089446583406196027634755818438e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.51953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.80082845222988104915780076816665811376325257794804e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16304011143799846843111212111260495740955724460844e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.52734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.52637064382003902226333974540096053392566128735022e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08910089454817618171205461377219663239656206354005e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.53515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22571450376289000001278843371459783620761964026442e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36249702341028008589045826702986410397012912904709e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.54296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49946773116932301309732751744953722231029126346486e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63664606836100199300226165836560082654887238338208e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.55078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77405165765079089783419383641965337744707947259452e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91170432114732759551367174850419509782585472590891e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.55859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04962409893975946705863512977926741730110899605668e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18783126811804488668597304309230757418655445388316e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.56640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32634636232249536218129854873203382200612639970098e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46519019187105731139818680492599864661432850376167e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.57421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.60438386451527503677057942069112096470660708202704e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74394880687856404334281468719091417527159191461011e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.58203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.88390678663337502806483517706706303095739191330472e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.02427993547706435128366410334899635275725704584021e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.58984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.16509077296982960387421259319514641259915543447926e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.30636223130194458226267853223267146033119688836326e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.59765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44811768106176498805637646337817923388445968601566e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.59038095808060603000739360761949422570819209911788e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.60546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.73317639143565086527888233910690716885943557654200e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.87652883269757340156275635385373789172745768932299e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.61328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.02046368651559365036792703758158149784555601527522e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.16500694263927667044145946879116606537710528519644e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.62109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31018520948359067342384024222999190532594772267877e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45602574935161564367593993713685007358536186330925e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.62890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60255651543790720669175415204007861250418903243190e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74980619074494545171846930824082767773201407564280e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.63671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.89780422905541754178666731200120149076546958331875e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.04658089811438846102378009478135229217539543328542e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.64453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19616732518780928821098759267869073306168616762584e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.34659554517741239172052324381351337830662490680744e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.65234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49789855148697728320168560667825948269915149112198e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.65011034985136468564856594364133703131867596374603e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.66015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.80326601535777269419392897717286438879602729753825e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.95740175290855108336930680485963056005472680796594e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.66796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.11255496139682430686772264889247897534154814090188e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.26876430189035334589546274389116075758343949758676e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.67578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.42606977014577414303134971466049624688623181490963e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.58451277380488310882288163359470075488686595989134e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.68359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.74413621465733462219129425896964561569492923310844e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.90498457639035315478919560434827057700411821656397e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.69140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.06710401828627699619029762829527775785025847826609e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23054247537343446268743874344186034038860806575417e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.69921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.39534976558556849766533672421653104101284193327489e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.56157770454041251979362066603791706893576777672076e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.70703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.72928022860981170501224651695596294779402544635284e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.89851352702281826568041708053406482267060399077072e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.71484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.06933618382042868991732981731903327683778049211437e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.24180933056718074475811972779037648260058221511975e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.72265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.41599681082196095188647911444502703373377770261645e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.59196535747955714353182241432999465132648793939716e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.73046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.76978478421742214185140384463517803619244579498785e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94952819242075830879661206434669971092120406861116e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.73828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.13127219511566873406119301005062170096048576202346e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.31509715961740080067603245114697688184732500932588e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.74609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.50108747080172235764142816696014524944397797022118e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.68933181713583005208786313049614919507807226788256e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.75390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.87992350186512083533727177758662092012850600324604e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.00729607820486228792863859054788768644744365249151e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.76171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02685472384747190194216544893563951741322238866541e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04667921798768908218602656800798525125137666299761e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.76953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06678110853147091220341113291781102477507787382916e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08717260890978562986849760571609969494354300822263e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.77734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10786665132220733188524032283620543957825067010411e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12887694529692805365864437467349833307816315605127e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.78515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15021804221161134460394215947401773984635551400974e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17190540651154074066537741066806130017312231661434e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.79296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19395549446874295273954813640902172894636549828066e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21638584145104788446634880619086527181141939030279e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.80078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23921515881685008719278461299232510042094064507963e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26246344172387198181843263481470665031951188807903e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.80859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28615208934364044613153187689263023500565577152964e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31030403921397562363480619469818801698833367088973e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.81640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33494391774730388327762461486718746182135887404098e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36009820925261741664314298719135880075160309673291e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.82421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38579544623528308147561770378298998291069435447743e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41206642422644329402039008236727065491131379281346e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.83203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43894444498096320102061428365633543440558564852878e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46646559259303986239248263261753112540493595538180e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.83984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49466904794112146248736006028509653369456653722975e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52359744792606448551215814625237535910408488746319e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.84765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55329729725622722446696215102952397443200898053734e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58381944212178952767489242020964875845531372433107e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.85546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61521961706725528436767664289542667668084392814972e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64755907881888895242774154629775987069182281943649e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.86328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68090534388763421593277486421165006597881155937148e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71533305062559095290467926282068904086311880655794e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.87109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.75092497130127405520149304919663885139873951807268e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78777320599109327486003359289950832456660816353247e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.87890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.82598059808698758538443295349421685195378444570311e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86566242157090907181680189290396765394233286379846e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.88671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90694840369852884963353039612138690706293500611801e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94998516446439884497123835171451955479771823102329e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.89453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.99493917772297054805704471816059443529057908424094e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04200039028200656353168185848438653746136511354273e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.90234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09138667776446768039690693058064390661207192288310e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14334937403136202998120100318789158621074793095204e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.91015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19818019103302928413100774795456740418726081571239e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25621995785515543565780905111478616154698242356459e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.91796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31786976613802108820384089261971867032207069978249e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38360533647314959687029290315016585988822268727435e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.92578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.45399575188877505107943151191453137828469465461499e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52972819582716132188558325053476431645151134471514e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.93359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61164107343000684767083444817632028828927968917161e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70076903638473389271107254016757513871156973570753e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.94140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79840522818853957979403024307775647164832128717134e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90618896524175502682103322803168945370341377670787e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.94921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.02623187745561328365781914812219941666139382669240e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.16130376436882129187002225210296787778679380216571e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.95703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31511403805596290718635028471440503099806209998330e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.49275168669289561003117954123304779848451307206373e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.96484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70139933971958385705119882168933347231988022896107e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.95154561868642736172727541497862573947423988108440e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.97265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.25916037511354144395537844168672288266598351274466e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64987772024289138285743520182808805983174934278635e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.98046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16779337095862086311250586064141028622224308218889e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.89632696959562976212621758577393939287586784169403e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.98828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01689760146742972661016754806402368873114241071209e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.02446937207505244639190533898754565373763855739835e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.99609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40479269149807902190125365288201718857308359786880e1), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_holtsmark_quantile_lower() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    holtsmark_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.78777320599109327486003359289950832456660816353247e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.70076903638473389271107254016757513871156973570753e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.95154561868642736172727541497862573947423988108440e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.89632696959562976212621758577393939287586784169403e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.02446937207505244639190533898754565373763855739835e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.40479269149807902190125365288201718857308359786880e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.48634712845212854436829850825868101882854019691874e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.75100888665534247471443047755794084714498529274899e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.20296704598194178128278789813687898595549252840676e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.54977454632239578277064855837334284482347724412824e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.52361836255645260708087122768062244544827526253646e3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.23734752970824778029419146983721335265273533376700e4), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.42062670553125367339285994800008811139726501576815e5), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.02041713923935805934169232925253585383521252378005e5), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.63680429392084153704899843149763443252941653727485e7), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.46626761095577290791010108814328116732395911944180e9), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.91162056903107188819131273099660458893079363542047e10), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.38341606205240162091324729036756554251581053237655e12), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.87423965612020298589373786039770640757408383939246e15), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.29757143623897661671305188560924841842505858740001e18), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.02366940392761715971201478542352832996294475084257e22), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.66397326199025580692950800332626492430331068544172e25), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.39662661178492965217151228166250717006976010266469e31), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.16169688570203891305395889425358741905195655516409e38), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.06948889094299024072567114732250581789838857797378e44), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.11034458952573565292573972403491751744218292992004e50), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.11034458952573565292573972403491751744218292992004e50), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.66220760083199094068657639449976729910923748932480e63), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.95304965911372717286692649124348058110571556299024e76), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.75980725346824443030445016705237404631255977139835e89), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -512)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.92674940440883821350824836233864795634501852860731e102), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -640)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.39113744455748572074441450789762236967081046313794e127), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -768)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.57731878887639063343639480262468437174058078877170e153), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -896)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.23102338973254024994084948896750440849135610207358e179), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_holtsmark_quantile_upper() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    holtsmark_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78777320599109327486003359289950832456660816353247e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70076903638473389271107254016757513871156973570753e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.95154561868642736172727541497862573947423988108440e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.89632696959562976212621758577393939287586784169403e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.02446937207505244639190533898754565373763855739835e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40479269149807902190125365288201718857308359786880e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48634712845212854436829850825868101882854019691874e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.75100888665534247471443047755794084714498529274899e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20296704598194178128278789813687898595549252840676e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.54977454632239578277064855837334284482347724412824e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.52361836255645260708087122768062244544827526253646e3), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_holtsmark_locscale_param() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    holtsmark_distribution<RealType> dist_0_1(static_cast<RealType>(0), static_cast<RealType>(1));
+    holtsmark_distribution<RealType> dist_1_3(static_cast<RealType>(1), static_cast<RealType>(3));
+
+    BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0694485051346244003155800384542166381), tolerance);
+    BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0694485051346244003155800384542166381) + log(static_cast<RealType>(3)), tolerance);
+
+    BOOST_CHECK_EQUAL(median(dist_0_1), static_cast<RealType>(0));
+    BOOST_CHECK_EQUAL(median(dist_1_3), static_cast<RealType>(1));
+
+    BOOST_CHECK_EQUAL(mode(dist_0_1), static_cast<RealType>(0));
+    BOOST_CHECK_EQUAL(mode(dist_1_3), static_cast<RealType>(1));
+
+    BOOST_CHECK_EQUAL(mean(dist_0_1), static_cast<RealType>(0));
+    BOOST_CHECK_EQUAL(mean(dist_1_3), static_cast<RealType>(1));
+
+    BOOST_CHECK((boost::math::isinf)(variance(dist_0_1)));
+
+    BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02038159607840130388931544845552929991729709746772e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.87352751452164445024482162286994868261727837966217e-1) / 3, tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94960170345170829219212314700442993280391160852854e-1), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+}
+
+BOOST_AUTO_TEST_CASE(holtsmark_pdf_fp64)
+{
+    do_test_holtsmark_pdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(holtsmark_pdf_std64)
+{
+    do_test_holtsmark_pdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(holtsmark_pdf_fp128)
+{
+    do_test_holtsmark_pdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(holtsmark_cdf_fp64)
+{
+    do_test_holtsmark_cdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(holtsmark_cdf_std64)
+{
+    do_test_holtsmark_cdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(holtsmark_cdf_fp128)
+{
+    do_test_holtsmark_cdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(holtsmark_ccdf_fp64)
+{
+    do_test_holtsmark_ccdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(holtsmark_ccdf_std64)
+{
+    do_test_holtsmark_ccdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(holtsmark_ccdf_fp128)
+{
+    do_test_holtsmark_ccdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_nearzero_fp64)
+{
+    do_test_holtsmark_quantile_nearzero<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_nearzero_std64)
+{
+    do_test_holtsmark_quantile_nearzero<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_nearzero_fp128)
+{
+    do_test_holtsmark_quantile_nearzero<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_lower_fp64)
+{
+    do_test_holtsmark_quantile_lower<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_lower_std64)
+{
+    do_test_holtsmark_quantile_lower<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_lower_fp128)
+{
+    do_test_holtsmark_quantile_lower<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_upper_fp64)
+{
+    do_test_holtsmark_quantile_upper<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_upper_std64)
+{
+    do_test_holtsmark_quantile_upper<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(holtsmark_quantile_upper_fp128)
+{
+    do_test_holtsmark_quantile_upper<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(holtsmark_locscale_fp64)
+{
+    do_test_holtsmark_locscale_param<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(holtsmark_locscale_std64)
+{
+    do_test_holtsmark_locscale_param<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(holtsmark_locscale_fp128)
+{
+    do_test_holtsmark_locscale_param<cpp_bin_float_quad, 113>();
+}
+#endif
diff --git a/test/test_holtsmark_cdf_double.cu b/test/test_holtsmark_cdf_double.cu
new file mode 100644
index 0000000000..6b1d57041c
--- /dev/null
+++ b/test/test_holtsmark_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::holtsmark_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_holtsmark_cdf_float.cu b/test/test_holtsmark_cdf_float.cu
new file mode 100644
index 0000000000..2a3533bac9
--- /dev/null
+++ b/test/test_holtsmark_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::holtsmark_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_holtsmark_cdf_nvrtc_double.cpp b/test/test_holtsmark_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..3b8c9ba946
--- /dev/null
+++ b/test/test_holtsmark_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/holtsmark.hpp>
+extern "C" __global__ 
+void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_holtsmark_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::holtsmark_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_holtsmark_cdf_nvrtc_float.cpp b/test/test_holtsmark_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..a3ffec5942
--- /dev/null
+++ b/test/test_holtsmark_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/holtsmark.hpp>
+extern "C" __global__ 
+void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_holtsmark_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::holtsmark_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_holtsmark_pdf_double.cu b/test/test_holtsmark_pdf_double.cu
new file mode 100644
index 0000000000..a53360d200
--- /dev/null
+++ b/test/test_holtsmark_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::holtsmark_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_holtsmark_pdf_float.cu b/test/test_holtsmark_pdf_float.cu
new file mode 100644
index 0000000000..57052803fc
--- /dev/null
+++ b/test/test_holtsmark_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::holtsmark_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_holtsmark_pdf_nvrtc_double.cpp b/test/test_holtsmark_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..a8355d368c
--- /dev/null
+++ b/test/test_holtsmark_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/holtsmark.hpp>
+extern "C" __global__ 
+void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_holtsmark_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::holtsmark_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_holtsmark_pdf_nvrtc_float.cpp b/test/test_holtsmark_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..7a02d19832
--- /dev/null
+++ b/test/test_holtsmark_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/holtsmark.hpp>
+extern "C" __global__ 
+void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_holtsmark_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::holtsmark_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_holtsmark_quan_nvrtc_double.cpp b/test/test_holtsmark_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..1c2cc61fa3
--- /dev/null
+++ b/test/test_holtsmark_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/holtsmark.hpp>
+extern "C" __global__ 
+void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_holtsmark_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::holtsmark_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_holtsmark_quan_nvrtc_float.cpp b/test/test_holtsmark_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..a343f232e0
--- /dev/null
+++ b/test/test_holtsmark_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/holtsmark.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/holtsmark.hpp>
+extern "C" __global__ 
+void test_holtsmark_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::holtsmark_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_holtsmark_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_holtsmark_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_holtsmark_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::holtsmark_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta.cpp b/test/test_ibeta.cpp
index e026ac6c52..987b361105 100644
--- a/test/test_ibeta.cpp
+++ b/test/test_ibeta.cpp
@@ -3,7 +3,18 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
 #include "test_ibeta.hpp"
 
 #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT)
diff --git a/test/test_ibeta.hpp b/test/test_ibeta.hpp
index 7c951d614f..cfd5d78cd1 100644
--- a/test/test_ibeta.hpp
+++ b/test/test_ibeta.hpp
@@ -8,9 +8,10 @@
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
+#include <boost/math/special_functions/beta.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_ibeta_derivative.cpp b/test/test_ibeta_derivative.cpp
index c899c94bf5..5d6a312754 100644
--- a/test/test_ibeta_derivative.cpp
+++ b/test/test_ibeta_derivative.cpp
@@ -4,7 +4,7 @@
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 #if defined(__GNUC__) && __GNUC__ <= 12
 #pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wliteral-range"
+#pragma GCC diagnostic ignored "-Woverflow"
 #endif
 #include <pch_light.hpp>
 #include "test_ibeta_derivative.hpp"
diff --git a/test/test_ibeta_derivative_double.cu b/test/test_ibeta_derivative_double.cu
new file mode 100644
index 0000000000..e5f7f340ba
--- /dev/null
+++ b/test/test_ibeta_derivative_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_derivative(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_derivative_float.cu b/test/test_ibeta_derivative_float.cu
new file mode 100644
index 0000000000..36a79665d4
--- /dev/null
+++ b/test/test_ibeta_derivative_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_derivative(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_derivative_nvrtc_double.cpp b/test/test_ibeta_derivative_nvrtc_double.cpp
new file mode 100644
index 0000000000..f15d21db00
--- /dev/null
+++ b/test/test_ibeta_derivative_nvrtc_double.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_derivative_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_derivative_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_derivative_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_derivative_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibeta_derivative(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_derivative_nvrtc_float.cpp b/test/test_ibeta_derivative_nvrtc_float.cpp
new file mode 100644
index 0000000000..17443e0bdc
--- /dev/null
+++ b/test/test_ibeta_derivative_nvrtc_float.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_derivative_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_derivative(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_derivative_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_derivative_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_derivative_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibeta_derivative(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_double.cu b/test/test_ibeta_double.cu
new file mode 100644
index 0000000000..20384bf25f
--- /dev/null
+++ b/test/test_ibeta_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_float.cu b/test/test_ibeta_float.cu
new file mode 100644
index 0000000000..be17813ee4
--- /dev/null
+++ b/test/test_ibeta_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_inv.cpp b/test/test_ibeta_inv.cpp
index 218c1625e8..ab1f4267fc 100644
--- a/test/test_ibeta_inv.cpp
+++ b/test/test_ibeta_inv.cpp
@@ -3,7 +3,18 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
 #include"test_ibeta_inv.hpp"
 
 #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT)
diff --git a/test/test_ibeta_inv.hpp b/test/test_ibeta_inv.hpp
index ba98901773..fa765b2ef8 100644
--- a/test/test_ibeta_inv.hpp
+++ b/test/test_ibeta_inv.hpp
@@ -8,10 +8,11 @@
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
+#include <boost/math/special_functions/beta.hpp>
 #include <boost/math/special_functions/next.hpp>  // for has_denorm_now
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
@@ -306,6 +307,7 @@ void test_spots(T)
       BOOST_MATH_CHECK_THROW(::boost::math::ibeta_inv(static_cast<T>(2.125), -n, static_cast<T>(0.125)), std::domain_error);
       BOOST_MATH_CHECK_THROW(::boost::math::ibeta_inv(static_cast<T>(2.125), static_cast<T>(1.125), -n), std::domain_error);
    }
+   #ifndef SYCL_LANGUAGE_VERSION
    if (boost::math::detail::has_denorm_now<T>())
    {
       T m = std::numeric_limits<T>::denorm_min();
@@ -317,5 +319,6 @@ void test_spots(T)
       BOOST_CHECK((boost::math::isfinite)(boost::math::ibeta_inv(static_cast<T>(12.125), m, static_cast<T>(0.125))));
       BOOST_CHECK((boost::math::isfinite)(boost::math::ibeta_inv(m, m, static_cast<T>(0.125))));
    }
+   #endif
 }
 
diff --git a/test/test_ibeta_inv_ab.cpp b/test/test_ibeta_inv_ab.cpp
index c1acb2d1ca..fdf735ef1e 100644
--- a/test/test_ibeta_inv_ab.cpp
+++ b/test/test_ibeta_inv_ab.cpp
@@ -3,7 +3,18 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
 #include "test_ibeta_inv_ab.hpp"
 
 #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT)
diff --git a/test/test_ibeta_inv_ab.hpp b/test/test_ibeta_inv_ab.hpp
index c378d15287..b91ab5261d 100644
--- a/test/test_ibeta_inv_ab.hpp
+++ b/test/test_ibeta_inv_ab.hpp
@@ -10,9 +10,10 @@
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
+#include <boost/math/special_functions/beta.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_ibeta_inv_double.cu b/test/test_ibeta_inv_double.cu
new file mode 100644
index 0000000000..ef62c5e162
--- /dev/null
+++ b/test/test_ibeta_inv_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_inv(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_inv_float.cu b/test/test_ibeta_inv_float.cu
new file mode 100644
index 0000000000..a0d48bfbda
--- /dev/null
+++ b/test/test_ibeta_inv_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_inv(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_inv_nvrtc_double.cpp b/test/test_ibeta_inv_nvrtc_double.cpp
new file mode 100644
index 0000000000..2f01012bbe
--- /dev/null
+++ b/test/test_ibeta_inv_nvrtc_double.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inv_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_inv_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inv_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibeta_inv(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_inv_nvrtc_float.cpp b/test/test_ibeta_inv_nvrtc_float.cpp
new file mode 100644
index 0000000000..5d804398cb
--- /dev/null
+++ b/test/test_ibeta_inv_nvrtc_float.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inv(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inv_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_inv_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inv_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibeta_inv(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_inva_double.cu b/test/test_ibeta_inva_double.cu
new file mode 100644
index 0000000000..7783eb21bb
--- /dev/null
+++ b/test/test_ibeta_inva_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_inva(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_inva_float.cu b/test/test_ibeta_inva_float.cu
new file mode 100644
index 0000000000..ff918f9436
--- /dev/null
+++ b/test/test_ibeta_inva_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_inva(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_inva_nvrtc_double.cpp b/test/test_ibeta_inva_nvrtc_double.cpp
new file mode 100644
index 0000000000..a392eaea65
--- /dev/null
+++ b/test/test_ibeta_inva_nvrtc_double.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inva_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_inva_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inva_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibeta_inva(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_inva_nvrtc_float.cpp b/test/test_ibeta_inva_nvrtc_float.cpp
new file mode 100644
index 0000000000..ba5745c321
--- /dev/null
+++ b/test/test_ibeta_inva_nvrtc_float.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_inva(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_inva_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_inva_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_inva_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibeta_inva(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_invb_double.cu b/test/test_ibeta_invb_double.cu
new file mode 100644
index 0000000000..562f5349dd
--- /dev/null
+++ b/test/test_ibeta_invb_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_invb(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_invb_float.cu b/test/test_ibeta_invb_float.cu
new file mode 100644
index 0000000000..86f5615c36
--- /dev/null
+++ b/test/test_ibeta_invb_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibeta_invb(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibeta_invb_nvrtc_double.cpp b/test/test_ibeta_invb_nvrtc_double.cpp
new file mode 100644
index 0000000000..6f046f09f3
--- /dev/null
+++ b/test/test_ibeta_invb_nvrtc_double.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_invb_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_invb_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_invb_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibeta_invb(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_invb_nvrtc_float.cpp b/test/test_ibeta_invb_nvrtc_float.cpp
new file mode 100644
index 0000000000..f2d17b8447
--- /dev/null
+++ b/test/test_ibeta_invb_nvrtc_float.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta_invb(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_invb_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_invb_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_invb_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibeta_invb(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_nvrtc_double.cpp b/test/test_ibeta_nvrtc_double.cpp
new file mode 100644
index 0000000000..bc920b6368
--- /dev/null
+++ b/test/test_ibeta_nvrtc_double.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibeta(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibeta_nvrtc_float.cpp b/test/test_ibeta_nvrtc_float.cpp
new file mode 100644
index 0000000000..ee15748628
--- /dev/null
+++ b/test/test_ibeta_nvrtc_float.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibeta_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibeta(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibeta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibeta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibeta_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibeta(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_inv_double.cu b/test/test_ibetac_inv_double.cu
new file mode 100644
index 0000000000..a983d16677
--- /dev/null
+++ b/test/test_ibetac_inv_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibetac_inv(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibetac_inv_float.cu b/test/test_ibetac_inv_float.cu
new file mode 100644
index 0000000000..94583b45e2
--- /dev/null
+++ b/test/test_ibetac_inv_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibetac_inv(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibetac_inv_nvrtc_double.cpp b/test/test_ibetac_inv_nvrtc_double.cpp
new file mode 100644
index 0000000000..a99d53b3cd
--- /dev/null
+++ b/test/test_ibetac_inv_nvrtc_double.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inv_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_inv_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inv_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibetac_inv(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_inv_nvrtc_float.cpp b/test/test_ibetac_inv_nvrtc_float.cpp
new file mode 100644
index 0000000000..47e89db4c1
--- /dev/null
+++ b/test/test_ibetac_inv_nvrtc_float.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_inv_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inv(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inv_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_inv_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inv_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibetac_inv(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_inva_double.cu b/test/test_ibetac_inva_double.cu
new file mode 100644
index 0000000000..2efbee265d
--- /dev/null
+++ b/test/test_ibetac_inva_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibetac_inva(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibetac_inva_float.cu b/test/test_ibetac_inva_float.cu
new file mode 100644
index 0000000000..9bd1a29a07
--- /dev/null
+++ b/test/test_ibetac_inva_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibetac_inva(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibetac_inva_nvrtc_double.cpp b/test/test_ibetac_inva_nvrtc_double.cpp
new file mode 100644
index 0000000000..7c7bf992b3
--- /dev/null
+++ b/test/test_ibetac_inva_nvrtc_double.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inva_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_inva_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inva_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibetac_inva(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_inva_nvrtc_float.cpp b/test/test_ibetac_inva_nvrtc_float.cpp
new file mode 100644
index 0000000000..c79b8b02f1
--- /dev/null
+++ b/test/test_ibetac_inva_nvrtc_float.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_inva_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_inva(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_inva_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_inva_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_inva_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibetac_inva(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_invb_double.cu b/test/test_ibetac_invb_double.cu
new file mode 100644
index 0000000000..fddd655af2
--- /dev/null
+++ b/test/test_ibetac_invb_double.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibetac_invb(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibetac_invb_float.cu b/test/test_ibetac_invb_float.cu
new file mode 100644
index 0000000000..fddd655af2
--- /dev/null
+++ b/test/test_ibetac_invb_float.cu
@@ -0,0 +1,149 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// floating-point value does not fit in required floating-point type
+#pragma nv_diag_suppress 221
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/array.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]);
+    }
+}
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+    // Consolidate the test data:
+    std::vector<float_type> v1, v2, v3;
+
+    for(unsigned i = 0; i < ibeta_data.size(); ++i)
+    {
+       v1.push_back(ibeta_data[i][0]);
+       v2.push_back(ibeta_data[i][1]);
+       v3.push_back(ibeta_data[i][2]);
+    }
+    for(unsigned i = 0; i < ibeta_small_data.size(); ++i)
+    {
+       v1.push_back(ibeta_small_data[i][0]);
+       v2.push_back(ibeta_small_data[i][1]);
+       v3.push_back(ibeta_small_data[i][2]);
+    }
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+    cuda_managed_ptr<float_type> input_vector3(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        int table_id = i % v1.size();
+        input_vector1[i] = v1[table_id];
+        input_vector2[i] = v2[table_id];
+        input_vector3[i] = v3[table_id];
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), input_vector3.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::ibetac_invb(input_vector1[i], input_vector2[i], input_vector3[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::isfinite(output_vector[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300)
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
+
+
diff --git a/test/test_ibetac_invb_nvrtc_double.cpp b/test/test_ibetac_invb_nvrtc_double.cpp
new file mode 100644
index 0000000000..76f6318901
--- /dev/null
+++ b/test/test_ibetac_invb_nvrtc_double.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_invb_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_invb_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_invb_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibetac_invb(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_invb_nvrtc_float.cpp b/test/test_ibetac_invb_nvrtc_float.cpp
new file mode 100644
index 0000000000..48d0a31eec
--- /dev/null
+++ b/test/test_ibetac_invb_nvrtc_float.cpp
@@ -0,0 +1,220 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/config.hpp>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_invb_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac_invb(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_invb_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_invb_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_invb_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            // Sometimes the ignore error policy is ignored so the below throws
+            // Rather than terminating we can continue to process through our results array
+            double res;
+            try
+            {
+                res = boost::math::ibetac_invb(h_in1[i], h_in2[i], h_in3[i]);
+            }
+            catch (...)
+            {
+                continue;
+            }
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_nvrtc_double.cpp b/test/test_ibetac_nvrtc_double.cpp
new file mode 100644
index 0000000000..6a59473e18
--- /dev/null
+++ b/test/test_ibetac_nvrtc_double.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibetac(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_ibetac_nvrtc_float.cpp b/test/test_ibetac_nvrtc_float.cpp
new file mode 100644
index 0000000000..a989191e51
--- /dev/null
+++ b/test/test_ibetac_nvrtc_float.cpp
@@ -0,0 +1,207 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/beta.hpp>
+extern "C" __global__ 
+void test_ibetac_kernel(const float_type *in1, const float_type *in2, const float_type *in3, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::ibetac(in1[i], in2[i], in3[i]);
+    }
+}
+)";
+
+template <class T> struct table_type { typedef T type; };
+typedef float_type T;
+#define SC_(x) static_cast<T>(x)
+
+#include "ibeta_data.ipp"
+#include "ibeta_small_data.ipp"
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_ibetac_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_ibetac_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_ibetac_kernel"), "Failed to get kernel function");
+
+        int numElements = ibeta_data.size() + ibeta_small_data.size();
+        float_type *h_in1, *h_in2, *h_in3, *h_out;
+        float_type *d_in1, *d_in2, *d_in3, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_in3 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        for (int i = 0; i < ibeta_data.size(); ++i) 
+        {
+            h_in1[i] = ibeta_data[i][0];
+            h_in2[i] = ibeta_data[i][1];
+            h_in3[i] = ibeta_data[i][2];
+        }
+        for (int i = 0; i < ibeta_small_data.size(); ++i)
+        {
+            h_in1[i + ibeta_data.size()] = ibeta_small_data[i][0];
+            h_in2[i + ibeta_data.size()] = ibeta_small_data[i][1];
+            h_in3[i + ibeta_data.size()] = ibeta_small_data[i][2];
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_in3, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+        checkCUDAError(cudaMemcpy(d_in3, h_in3, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in3");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_in3, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::ibetac(h_in1[i], h_in2[i], h_in3[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_in3);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_in3;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_igamma.cpp b/test/test_igamma.cpp
index 8e80c772c4..6e034f3c60 100644
--- a/test/test_igamma.cpp
+++ b/test/test_igamma.cpp
@@ -3,7 +3,18 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif 
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
 #include "test_igamma.hpp"
 
 //
diff --git a/test/test_igamma.hpp b/test/test_igamma.hpp
index b434f727ee..bfe386d4de 100644
--- a/test/test_igamma.hpp
+++ b/test/test_igamma.hpp
@@ -8,11 +8,12 @@
 
 #include <boost/math/concepts/real_concept.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/gamma.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_igamma_inv.cpp b/test/test_igamma_inv.cpp
index eafed0e1da..80a553427c 100644
--- a/test/test_igamma_inv.cpp
+++ b/test/test_igamma_inv.cpp
@@ -3,7 +3,18 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
 #include "test_igamma_inv.hpp"
 
 #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT)
@@ -89,14 +100,22 @@ void expected_results()
       "linux.*",                          // platform
       largest_type,                     // test type(s)
       "[^|]*medium[^|]*",                   // test data group
+      #ifdef SYCL_LANGUAGE_VERSION
+      "[^|]*", 350, 50);
+      #else
       "[^|]*", 350, 5);                  // test function
+      #endif
    add_expected_result(
       "[^|]*",                          // compiler
       "[^|]*",                          // stdlib
       "linux.*",                          // platform
       largest_type,                     // test type(s)
       "[^|]*large[^|]*",                   // test data group
+      #ifdef SYCL_LANGUAGE_VERSION
+      "[^|]*", 150, 20);                  // test function
+      #else
       "[^|]*", 150, 5);                  // test function
+      #endif
 
 
    //
diff --git a/test/test_igamma_inv.hpp b/test/test_igamma_inv.hpp
index 7330e918a7..cf481537e7 100644
--- a/test/test_igamma_inv.hpp
+++ b/test/test_igamma_inv.hpp
@@ -6,13 +6,14 @@
 
 #include <boost/math/concepts/real_concept.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/gamma.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/results_collector.hpp>
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_igamma_inva.cpp b/test/test_igamma_inva.cpp
index 047df11735..443ad7bbc6 100644
--- a/test/test_igamma_inva.cpp
+++ b/test/test_igamma_inva.cpp
@@ -3,7 +3,18 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
 #include "test_igamma_inva.hpp"
 
 #if !defined(TEST_FLOAT) && !defined(TEST_DOUBLE) && !defined(TEST_LDOUBLE) && !defined(TEST_REAL_CONCEPT)
diff --git a/test/test_igamma_inva.hpp b/test/test_igamma_inva.hpp
index 402ea2f8bc..d9d317da15 100644
--- a/test/test_igamma_inva.hpp
+++ b/test/test_igamma_inva.hpp
@@ -8,13 +8,14 @@
 
 #include <boost/math/concepts/real_concept.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/gamma.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/results_collector.hpp>
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_inverse_chi_squared_cdf_double.cu b/test/test_inverse_chi_squared_cdf_double.cu
new file mode 100644
index 0000000000..9703e7a3a0
--- /dev/null
+++ b/test/test_inverse_chi_squared_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::inverse_chi_squared_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_chi_squared_cdf_float.cu b/test/test_inverse_chi_squared_cdf_float.cu
new file mode 100644
index 0000000000..bb56a48728
--- /dev/null
+++ b/test/test_inverse_chi_squared_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::inverse_chi_squared_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_chi_squared_cdf_nvrtc_double.cpp b/test/test_inverse_chi_squared_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..b221aedaab
--- /dev/null
+++ b/test/test_inverse_chi_squared_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+extern "C" __global__ 
+void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::inverse_chi_squared_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_chi_squared_cdf_nvrtc_float.cpp b/test/test_inverse_chi_squared_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..743654c149
--- /dev/null
+++ b/test/test_inverse_chi_squared_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+extern "C" __global__ 
+void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::inverse_chi_squared_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_chi_squared_distribution.cpp b/test/test_inverse_chi_squared_distribution.cpp
index a697824185..cbc9dcf191 100644
--- a/test/test_inverse_chi_squared_distribution.cpp
+++ b/test/test_inverse_chi_squared_distribution.cpp
@@ -14,11 +14,14 @@
 
 // http://www.wolframalpha.com/input/?i=inverse+chisquare+distribution
 
-#include <boost/math/tools/test.hpp>
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
+#endif
 
-//#include <boost/math/tools/test.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // for test_main
 #include <boost/test/tools/floating_point_comparison.hpp> // for BOOST_CHECK_CLOSE_FRACTION
diff --git a/test/test_inverse_chi_squared_pdf_double.cu b/test/test_inverse_chi_squared_pdf_double.cu
new file mode 100644
index 0000000000..f306117493
--- /dev/null
+++ b/test/test_inverse_chi_squared_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::inverse_chi_squared_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_chi_squared_pdf_float.cu b/test/test_inverse_chi_squared_pdf_float.cu
new file mode 100644
index 0000000000..8a3d1c1ef3
--- /dev/null
+++ b/test/test_inverse_chi_squared_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::inverse_chi_squared_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_chi_squared_pdf_nvrtc_double.cpp b/test/test_inverse_chi_squared_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..4608b3bd62
--- /dev/null
+++ b/test/test_inverse_chi_squared_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+extern "C" __global__ 
+void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::inverse_chi_squared_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_chi_squared_pdf_nvrtc_float.cpp b/test/test_inverse_chi_squared_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..8b4db55c0a
--- /dev/null
+++ b/test/test_inverse_chi_squared_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+extern "C" __global__ 
+void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::inverse_chi_squared_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_chi_squared_quan_double.cu b/test/test_inverse_chi_squared_quan_double.cu
new file mode 100644
index 0000000000..f9022c6a32
--- /dev/null
+++ b/test/test_inverse_chi_squared_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::inverse_chi_squared_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_chi_squared_quan_float.cu b/test/test_inverse_chi_squared_quan_float.cu
new file mode 100644
index 0000000000..10aa6d7075
--- /dev/null
+++ b/test/test_inverse_chi_squared_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::inverse_chi_squared_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_chi_squared_quan_nvrtc_double.cpp b/test/test_inverse_chi_squared_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..0f8a9a5f82
--- /dev/null
+++ b/test/test_inverse_chi_squared_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+extern "C" __global__ 
+void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::inverse_chi_squared_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_chi_squared_quan_nvrtc_float.cpp b/test/test_inverse_chi_squared_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..ab494a8da3
--- /dev/null
+++ b/test/test_inverse_chi_squared_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_chi_squared.hpp>
+extern "C" __global__ 
+void test_inverse_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_chi_squared_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::inverse_chi_squared_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gamma_cdf_double.cu b/test/test_inverse_gamma_cdf_double.cu
new file mode 100644
index 0000000000..4368a2284b
--- /dev/null
+++ b/test/test_inverse_gamma_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::inverse_gamma_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gamma_cdf_float.cu b/test/test_inverse_gamma_cdf_float.cu
new file mode 100644
index 0000000000..cef2ec955f
--- /dev/null
+++ b/test/test_inverse_gamma_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::inverse_gamma_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gamma_cdf_nvrtc_double.cpp b/test/test_inverse_gamma_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..c5a4b9878a
--- /dev/null
+++ b/test/test_inverse_gamma_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gamma.hpp>
+extern "C" __global__ 
+void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::inverse_gamma_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gamma_cdf_nvrtc_float.cpp b/test/test_inverse_gamma_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..d76d512256
--- /dev/null
+++ b/test/test_inverse_gamma_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gamma.hpp>
+extern "C" __global__ 
+void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::inverse_gamma_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gamma_distribution.cpp b/test/test_inverse_gamma_distribution.cpp
index 68b238fbc8..436131d83f 100644
--- a/test/test_inverse_gamma_distribution.cpp
+++ b/test/test_inverse_gamma_distribution.cpp
@@ -14,11 +14,14 @@
 #  pragma warning (disable : 4310) // cast truncates constant value
 #endif
 
-#include <boost/math/tools/test.hpp>
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
+#endif
 
-//#include <boost/math/tools/test.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // for test_main
 #include <boost/test/tools/floating_point_comparison.hpp> // for BOOST_CHECK_CLOSE_FRACTION
diff --git a/test/test_inverse_gamma_pdf_double.cu b/test/test_inverse_gamma_pdf_double.cu
new file mode 100644
index 0000000000..fa5073dbe0
--- /dev/null
+++ b/test/test_inverse_gamma_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::inverse_gamma_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gamma_pdf_float.cu b/test/test_inverse_gamma_pdf_float.cu
new file mode 100644
index 0000000000..c2d80fe8d5
--- /dev/null
+++ b/test/test_inverse_gamma_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::inverse_gamma_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gamma_pdf_nvrtc_double.cpp b/test/test_inverse_gamma_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..db2c8c4e19
--- /dev/null
+++ b/test/test_inverse_gamma_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gamma.hpp>
+extern "C" __global__ 
+void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::inverse_gamma_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gamma_pdf_nvrtc_float.cpp b/test/test_inverse_gamma_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..4d552cf619
--- /dev/null
+++ b/test/test_inverse_gamma_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gamma.hpp>
+extern "C" __global__ 
+void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::inverse_gamma_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gamma_quan_double.cu b/test/test_inverse_gamma_quan_double.cu
new file mode 100644
index 0000000000..c9095d7527
--- /dev/null
+++ b/test/test_inverse_gamma_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::inverse_gamma_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gamma_quan_float.cu b/test/test_inverse_gamma_quan_float.cu
new file mode 100644
index 0000000000..3e60feaa18
--- /dev/null
+++ b/test/test_inverse_gamma_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gamma distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::inverse_gamma_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gamma_quan_nvrtc_double.cpp b/test/test_inverse_gamma_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..a49600bde1
--- /dev/null
+++ b/test/test_inverse_gamma_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gamma.hpp>
+extern "C" __global__ 
+void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::inverse_gamma_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gamma_quan_nvrtc_float.cpp b/test/test_inverse_gamma_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..f71ed964aa
--- /dev/null
+++ b/test/test_inverse_gamma_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gamma.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gamma.hpp>
+extern "C" __global__ 
+void test_inverse_gamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gamma_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::inverse_gamma_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gaussian.cpp b/test/test_inverse_gaussian.cpp
index 68012d48a2..3825da9397 100644
--- a/test/test_inverse_gaussian.cpp
+++ b/test/test_inverse_gaussian.cpp
@@ -16,8 +16,13 @@
 
 //#include <pch.hpp> // include directory libs/math/src/tr1/ is needed.
 
-#include <boost/math/tools/test.hpp>
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
@@ -26,7 +31,6 @@
 using boost::math::inverse_gaussian_distribution;
 using boost::math::inverse_gaussian;
 
-#include <boost/math/tools/test.hpp>
 #include "test_out_of_range.hpp"
 
 #include <iostream>
diff --git a/test/test_inverse_gaussian_cdf_double.cu b/test/test_inverse_gaussian_cdf_double.cu
new file mode 100644
index 0000000000..3224ff527b
--- /dev/null
+++ b/test/test_inverse_gaussian_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::inverse_gaussian_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gaussian_cdf_float.cu b/test/test_inverse_gaussian_cdf_float.cu
new file mode 100644
index 0000000000..e2abb72dd1
--- /dev/null
+++ b/test/test_inverse_gaussian_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::inverse_gaussian_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gaussian_cdf_nvrtc_double.cpp b/test/test_inverse_gaussian_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1d677010fe
--- /dev/null
+++ b/test/test_inverse_gaussian_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+extern "C" __global__ 
+void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::inverse_gaussian_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gaussian_cdf_nvrtc_float.cpp b/test/test_inverse_gaussian_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..87e1537b89
--- /dev/null
+++ b/test/test_inverse_gaussian_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+extern "C" __global__ 
+void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::inverse_gaussian_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gaussian_pdf_double.cu b/test/test_inverse_gaussian_pdf_double.cu
new file mode 100644
index 0000000000..7f9128037b
--- /dev/null
+++ b/test/test_inverse_gaussian_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::inverse_gaussian_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gaussian_pdf_float.cu b/test/test_inverse_gaussian_pdf_float.cu
new file mode 100644
index 0000000000..3795ff2dbb
--- /dev/null
+++ b/test/test_inverse_gaussian_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::inverse_gaussian_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gaussian_pdf_nvrtc_double.cpp b/test/test_inverse_gaussian_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..e0f87de0ab
--- /dev/null
+++ b/test/test_inverse_gaussian_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+extern "C" __global__ 
+void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::inverse_gaussian_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gaussian_pdf_nvrtc_float.cpp b/test/test_inverse_gaussian_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..eb124deda9
--- /dev/null
+++ b/test/test_inverse_gaussian_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+extern "C" __global__ 
+void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::inverse_gaussian_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gaussian_quan_double.cu b/test/test_inverse_gaussian_quan_double.cu
new file mode 100644
index 0000000000..2727e53f49
--- /dev/null
+++ b/test/test_inverse_gaussian_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::inverse_gaussian_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gaussian_quan_float.cu b/test/test_inverse_gaussian_quan_float.cu
new file mode 100644
index 0000000000..2727e53f49
--- /dev/null
+++ b/test/test_inverse_gaussian_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch inverse_gaussian distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::inverse_gaussian_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_inverse_gaussian_quan_nvrtc_double.cpp b/test/test_inverse_gaussian_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..a72aab4be4
--- /dev/null
+++ b/test/test_inverse_gaussian_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+extern "C" __global__ 
+void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::inverse_gaussian_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_inverse_gaussian_quan_nvrtc_float.cpp b/test/test_inverse_gaussian_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..550393f7e1
--- /dev/null
+++ b/test/test_inverse_gaussian_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/inverse_gaussian.hpp>
+extern "C" __global__ 
+void test_inverse_gaussian_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::inverse_gaussian_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_inverse_gaussian_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_inverse_gaussian_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_inverse_gaussian_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::inverse_gaussian_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_jacobi_zeta.cpp b/test/test_jacobi_zeta.cpp
index 77f33efb1e..c64f99580e 100644
--- a/test/test_jacobi_zeta.cpp
+++ b/test/test_jacobi_zeta.cpp
@@ -4,7 +4,10 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
 #include "test_jacobi_zeta.hpp"
 
 //
diff --git a/test/test_jacobi_zeta.hpp b/test/test_jacobi_zeta.hpp
index 1aa72feb0d..a39d3ba709 100644
--- a/test/test_jacobi_zeta.hpp
+++ b/test/test_jacobi_zeta.hpp
@@ -8,11 +8,17 @@
 // Constants are too big for float case, but this doesn't matter for test.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp>
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/math_fwd.hpp>
+#include <boost/math/special_functions/jacobi_zeta.hpp>
 #include <boost/math/constants/constants.hpp>
 //#include <boost/math/special_functions/next.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_jacobi_zeta_double.cu b/test/test_jacobi_zeta_double.cu
new file mode 100644
index 0000000000..8594da140b
--- /dev/null
+++ b/test/test_jacobi_zeta_double.cu
@@ -0,0 +1,120 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::jacobi_zeta(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::jacobi_zeta(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    int fail_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i] << '\n'
+                          << "  Host: " << results[i] << '\n'
+                          << "   Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                fail_counter++;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (fail_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_jacobi_zeta_float.cu b/test/test_jacobi_zeta_float.cu
new file mode 100644
index 0000000000..7b473455ad
--- /dev/null
+++ b/test/test_jacobi_zeta_float.cu
@@ -0,0 +1,120 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::jacobi_zeta(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::jacobi_zeta(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    int fail_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 200)
+            {
+                std::cerr << "Result verification failed at element " << i << "!\n"
+                          << "Device: " << output_vector[i] << '\n'
+                          << "  Host: " << results[i] << '\n'
+                          << "   Eps: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                fail_counter++;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (fail_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_jacobi_zeta_nvrtc_double.cpp b/test/test_jacobi_zeta_nvrtc_double.cpp
new file mode 100644
index 0000000000..ded2e66571
--- /dev/null
+++ b/test/test_jacobi_zeta_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/jacobi_zeta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/jacobi_zeta.hpp>
+extern "C" __global__ 
+void test_jacobi_zeta_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::jacobi_zeta(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_jacobi_zeta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_jacobi_zeta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_jacobi_zeta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::jacobi_zeta(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_jacobi_zeta_nvrtc_float.cpp b/test/test_jacobi_zeta_nvrtc_float.cpp
new file mode 100644
index 0000000000..de52da118d
--- /dev/null
+++ b/test/test_jacobi_zeta_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/jacobi_zeta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/jacobi_zeta.hpp>
+extern "C" __global__ 
+void test_jacobi_zeta_kernel(const float_type *in1, const float_type* in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::jacobi_zeta(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_jacobi_zeta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_jacobi_zeta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_jacobi_zeta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::jacobi_zeta(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_landau.cpp b/test/test_landau.cpp
new file mode 100644
index 0000000000..c69c208177
--- /dev/null
+++ b/test/test_landau.cpp
@@ -0,0 +1,873 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_TEST_MAIN
+#define BOOST_TEST_MODULE StatsLandauTest
+#include <boost/math/tools/config.hpp>
+#include <boost/test/included/unit_test.hpp>
+#include <boost/test/tools/floating_point_comparison.hpp>
+#include <boost/math/distributions/landau.hpp>
+
+#if __has_include(<stdfloat>)
+# include <stdfloat>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
+using boost::math::landau_distribution;
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#  include <boost/multiprecision/cpp_bin_float.hpp>
+   using boost::multiprecision::cpp_bin_float_quad;
+#endif
+
+template<class RealType, int N>
+void do_test_landau_pdf(){
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    landau_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    // Left tail of Landau distribution inherently limits accuracy due to the rapid decay of the function value.
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23085262843901249987787600097675204323741277288698e-2763), tolerance * 10000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43092685592935588160074717060232841559786193515624e-1259), tolerance * 10000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.07984079126853424748872672315932614660166330244758e-574), tolerance * 4000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51902330649665702862232056199116883579782363388698e-261), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.61740160083645125413680224234682096739098348513281e-119), tolerance * 400);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.40268429420551752841762522647338593851384890728514e-54), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.64264014182893964260735316412669280242258553581563e-37), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.96628298286997713279446280099737948363623849272499e-25), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.65014695258589331535696024118503069088391324558285e-17), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52577680004870416264378840612037346351517011876564e-11), tolerance * 40);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.96564073685328676511751663666044321538322085645615e-8), tolerance * 40);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49195679110887105000100573294850045215882388658736e-5), tolerance * 10);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.83029281547111049679644135145824504175531296639784e-4), tolerance * 4);
+
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.50763682207511020788551990942118742909694977020766e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55239934591810979484111502955882518822410677336301e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.11811339800999276275816189148400161297598608940370e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43803188361205250108898134179433045737003842752098e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44559645555354621908225839934967437642938589048641e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19256762240252210606670736977933945194758525070962e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55747609478534232829752995639617852824070107389867e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90787204474682320276094536207112207798402106183626e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21762208692280384264052188465103527015057255360522e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35142856681038695962612277740850797545496160091346e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46926317225475336949604234617889242016284403135263e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.57038182663311646092167022800387128596042855954974e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65453539644067140109079490907422208395106068408806e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72189928839617935958276513066615896148477769341984e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77299781354951097794140321722510611592577477809046e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80862887196928314574763226767834515262714169253341e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82979296472337068393182796635835781531645972141685e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83762914935252825962037404558307269544013453409236e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83335939453844198706047097920194738266342072083183e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.81824188244607845783889480215304948211453087161311e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79353314321623987234651193548815637401927993615201e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76045845604337224786926223067521339118457783620200e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72018967730828877761590993107231423707402335516208e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67382951900254916684467526412940597211696659408252e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62240126375351657025589608183516471314881294729647e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56684293488331550048284390907274219670798777890274e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.50800501583089077855151395221936193088464543391697e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44665091350810416315066697862911894015423062289459e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38345947054726504652084392231776081590084152956389e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31902894209756674501863785530002142859374765415539e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25388195748200100617512573536171938322886836672500e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18847108191054606649972320910802001046885937665349e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.12318467675064265241298421202132871385510191955048e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05835282809935995378486652717710487064456876565464e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.99425317296731752992133425091298094257535743484505e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.93111650117890312500196499480137441707124661561802e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86913205029713491869481191910148340512494429155677e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.80845244177744590342641324845464181512725512270901e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74919823041494123736512984399481253444800810061398e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69146205716679200390505774997534008647397580964682e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63531240868022603475813051802104652763421266792797e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52794577355035866144175272867439043108097332289280e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.42728268648990888425150769069939371653574963683898e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33330183140387812967271143318337448709293578674499e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.24583903791438503777896988292725453902854934482082e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16463504238864475787300731138600083506835647657585e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08937118376337417303260299595016407050621597377842e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01969562586719050610986409853288944869103385155807e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.55242261334771588093967856464157010583515904813711e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40542106335080455284302185631726193959498759868958e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.42466467030240081155174776845818529710340047591917e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.58484837369864336802514847217316987349368653299870e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86394883380361824062302169684513676586056038951383e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.24319810137134739489567689474925534356381531907667e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.70679858898932226345344507218372775665959269901845e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.24154519527191616123631238437619039275267092832219e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.83643820409470770350079809236512802617514831283958e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17158939997042257734107084716347218188987519860810e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65588931293442756274955123891841674957134577995432e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.24999943998132461512162382109845468953569057653206e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92615450486233381461674333853015589016995103903169e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66449631910567440889641030798080985765446733145615e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.45062281189987454243698017666196791149621836621069e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.27394302531621603387264220385271200998719076373774e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12656323880287532946687856443190592954884366662750e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.97272209098442387115881795542777687494895710641442e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.29822142754406367527698808542596968321101482611499e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04211802337476854086853204096220197132228559528499e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.07803739765703855539464392586871287241424478552475e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.32333875346680119671101261202737313738117089411627e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72232982334134875374745579629692504175963269123550e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.23647379895109194018623503694593516089449736564227e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83847488747490686627461184914507143000166295798209e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23229209720541068914767304360845920096605060550204e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79947182256974676114073539538738666749318597550126e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48016777321149926163568000853843271440982208135806e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23815521917416110142990010737370397993296057042020e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05051356188948975788809310742705170449199446295996e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.02189494175551818802120780043908400302499693753383e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.82977743390109357722237932712596740553146168838163e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.85767880395157523314894776472286059373130513345678e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.38436988071020010518465973524617926941561115862543e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.33733686912592492663864652100824543220296291087705e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.56719760886187032655497099894304037738414747694058e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98455504950479524812911915761096343833926775180855e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.53330379508737569512814290970113441709685617438320e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.17681386666988585981678825113751906947724221061916e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89036019000414403525553377301848537121144402020797e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65677119200426097313111689557286038389044055520291e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30262785101049126879812527584486523431435416855092e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05075497824543686508583330151701088746046662556142e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.65309719784826735811404845249006631571011107397937e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.24862987881373408810690419771199915049078409763957e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.15969937273460908511004283053502890256890552857434e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.29851658065131948803621145305770537695198696080417e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60580149897015438520712002341005415255634795262650e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.04036158768936643116910162586744782570492472991852e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.94016243990046849166189460556288227597025594512208e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46086028677885635430916943188657821915692719236659e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.11684517285572531637079275580373275248746393740505e-7), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52416040197318095218142515144159361915369773070323e-7), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.80328507092562833387828627033077037362079287147494e-8), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.49830894979328205589801770626380874036837651152185e-9), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37321260457507890170942420030228551149027146711858e-9), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.93116689130330056188281287044879751157310863062584e-10), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48253875064413349633513149673423379659751420343007e-10), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70600576035028939688935887999924497768090063213495e-11), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.26455685393631638684357809746815117207931914505548e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31607812424720075650435345952742430841572532807506e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.79011407780867471284567547634162699267979186606188e-13), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44751775633854916204285667335984781835956253195982e-13), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.61878017561309094548695350627490496501579775589554e-14), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04693171826032841875566733962302122545285900020005e-15), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26173047049833308211942124367857471386868403241961e-15), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.65432295369693602811408648241147072923809751085578e-16), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41358031701599971473805281134574751063094181933632e-16), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53395024254254681824332124678431347548990493315457e-17), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83487488981301080647710951024578189621167614209000e-18), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20871862925450931967259224559170596670053211110073e-18), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.52179645209930996243372289840247886946662593536877e-19), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38044909732789001695763980339161333087125784369381e-19), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45112272298940668060590155421055597940788628104792e-20), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15695169189532579197171519055168026495351913764385e-21), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34809480577241011630081955001585893790541249979882e-22), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.42559253331811413189334788330851699569177453819777e-24), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.26599533286729634727335420905860617977183492898697e-25), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29124708296676552183969416435576047604811850887072e-26), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05702942684184448841776719264034349959339461930208e-27), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28564339177412106630199552439532586497065103592483e-28), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.03527119858493090854162194048072858076706076597819e-30), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.02204449911503854915599875932648541933495423396290e-31), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13877781194681051697029360385733987849026091896815e-32), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96173613246674215642318845771797429014235991071697e-33), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22608508279171150505713607335390403334626262247470e-34), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.66303176744819310534466860343470655112837733629379e-36), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.78939485465512007489594618239133015330653514470962e-37), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99337178415944994713157754959412500112801769504923e-38), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87085736509965620084544629248142254049400680133870e-39), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30803658242053201409813080005840450554429514097447e-42), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85470179000802031747569733233640023770585023325816e-44), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11511788672188293650018950543585978468059122696737e-46), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35592924500735522070031636324180548468184125270321e-49), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70153486133099813308596977841944913997064236009234e-51), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.64662055207421145736704601944518511418678161667227e-54), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.59633615315398885053400175162079403845778513781261e-56), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01419380982577689473984441890730791873276374028644e-58), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96169456963194099507751725745105477863201113021124e-61), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54753694126247695120215517859242605013890436588657e-63), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04506617680655059063341866635141218965712131228669e-66), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36135397531505882446617916654287979923104956976029e-68), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22403896607444853307101236930796194301091622511538e-71), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60314022112283145823086420676091852871339922342198e-73), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40747664887610603837143133076598369654786648237564e-75), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49795565967228921238840363580462378845964351238859e-78), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49795565967228921238840363580462378845964351238859e-78), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.38921456859174989683289128998508268228293005240118e-83), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28009255502193449353529225005875895420403010118164e-87), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95326622775563734975477943429376061127322830738396e-92), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98044773522283531151547154891015718272892496044375e-97), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54780233035710954515910575700402402149799340844629e-102), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.93939564568650748467881127472537845077208466864701e-107), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05886774378761405711041431804281287395814280222275e-111), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61570395475404976976076403509950694878867004733696e-116), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46536858330390895044061895004197227293193061422265e-121), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.76185391739488060064791709906306804341420076633095e-126), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.74013354094677826026598678445902716585418818104698e-131), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.75874868918880960123594174874729486977262600867765e-136), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33647898699780419940733974437672346035348907603114e-140), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03930509490631744294332846737170938164289714970571e-145), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.11173262772570410605366282252763272345412773087419e-150), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74812717853653580635629703144475208046589314403411e-155), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_landau_cdf() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    landau_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    // Left tail of Landau distribution inherently limits accuracy due to the rapid decay of the function value.
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23104384186197610398535377648417544130503923472088e-2767), tolerance * 10000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13839831060468958638389597955042905895011279005609e-1263), tolerance * 10000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44312475898376675389279659203392726913179988221856e-577), tolerance * 4000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60161744250590449897633679409320688863793657438650e-264), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.35715172357835600843326068305277508896293574359993e-122), tolerance * 400);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21485044777822987831938208827397125595048645106450e-56), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20700596052912975894895710721207297741913643002012e-39), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.68977785355043338853968150291349097131276031028880e-27), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08280503661515358192902976596786876511904900370188e-18), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65792002575428625860974216754165889856915439054540e-13), tolerance * 40);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10050102765608360408879765554028186586411956977646e-9), tolerance * 40);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.68815894016077213783105942021589578300879813020189e-7), tolerance * 10);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.37625644367407768681214121219960213300943983402353e-5), tolerance * 4);
+
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.07114056489178077422539043012078031612755414599729e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.02378089289225642153717146519785629105551593533471e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.86628849116890981757518380028315283124563329691336e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01356086445894450901462536543096095003540865562528e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87500433294484935737405475465845379363381335629325e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.14477362642888337899595114315256777323730741351157e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.86347714259952478386139532766899266845283140865196e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03233833485353985646171581359634291113172343250638e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.61609610406317335842332400044553397267403259400925e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10447280320452875363390566033897366825208362273976e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25520504813750944299743577454809066282274647597463e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41278210604233003854999957016294448201095585229262e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57614904197281705412681135912362432813551089393050e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74424900339390561779123457277662541525802871208804e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91604740159711318114657552509684829710278507526176e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09055135109096066379824832581601920047332653592899e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26682451996131584587603100587452166035163093744954e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44399775009966844789856291613140149240683895719411e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62127593214618528740100724985588598153581542828585e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79794168130771479096847395825066083537476191027208e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.97335637253762835584202042505471763077481208909960e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.14695907126043189562824247063432944143645211079990e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31826385110143246791390839018674857130873325623297e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48685593270949525320895832911820568633181874504622e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65238701512374797371944390202236332910560491002867e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.81457010849525523978613828303353744448250289768502e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.97317411782647886281236276761522493696113175680302e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.12801837383425412356107316013533713934091886254748e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27896726012877622151287254340779424837571495657384e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.42592504593813016256394437035333320729928769135529e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.56883100040026405468976154826963286030986607276898e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.70765483748602800807348700535078666283426208025338e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.84239251923251354944511010208659148773285992261636e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.97306242841106189982720542754596269997825882200963e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.09970190928358551894563742001208165448751595738393e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.22236416601074576846725329522076603667792610069772e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.34111550192882688466795267571966376590108144590146e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.45603287874932276581779533145259002587664864932810e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.56720177227709322443036736236667681466823855576748e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.67471430008752600464561254217339060305933972234066e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77866759641952347636729485475686950346612605081233e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.97630190201768046886571833364686445094779890812683e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.16093363694296970300414411298982724729800950998126e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.33340119589825831941744118804669746898228023705595e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.49453081078357569536385447775302397639174227456126e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.64512181956668895495628599815612188484872430606076e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78593712196194817010501161391740217697424135219932e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.91769748027973587988050487247823119315646010788285e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.04107862044208783622223577234526499721208130809085e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.26517695571963502030771662018373539384937999898516e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.46273425405663094259919090350425049446731674380517e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.63758376080325935199241149369372014196158210794207e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.79296673358868394263845167623854238605663977145579e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.93161494811251760796024569455411548871678150466575e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.05582884526567009703468807923041602465523081207835e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16754715036218405381848078882559199991501527210887e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.26840681332434061224397365001110201431714574161004e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44288421192372625436336887550283154670590764164969e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.58804227080862094903927728732613232538756639404536e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.71029700052871041881284261613115047981162112317573e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.81440531622793612780308998937907666985394243647458e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.90394591759649482553720310181182407678855125658003e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.98164927634689821536641290454567745500263396394663e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04962661826577677675750333226666504803112871241155e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.10953085258825170758957718650744990707007748273190e-1), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_landau_ccdf() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    landau_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.99292885943510821922577460956987921968387244585400e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.97976219107107743578462828534802143708944484064665e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.95133711508831090182424816199716847168754366703087e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.89864391355410554909853746345690390499645913443747e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.81249956670551506426259452453415462063661866437068e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.68552263735711166210040488568474322267626925864884e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51365228574004752161386046723310073315471685913480e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.29676616651464601435382841864036570888682765674936e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.03839038959368266415766759995544660273259674059908e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.89552719679547124636609433966102633174791637726024e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.74479495186249055700256422545190933717725352402537e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.58721789395766996145000042983705551798904414770738e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.42385095802718294587318864087637567186448910606950e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.25575099660609438220876542722337458474197128791196e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.08395259840288681885342447490315170289721492473824e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.90944864890903933620175167418398079952667346407101e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.73317548003868415412396899412547833964836906255046e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55600224990033155210143708386859850759316104280589e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37872406785381471259899275014411401846418457171415e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20205831869228520903152604174933916462523808972792e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.02664362746237164415797957494528236922518791090040e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.85304092873956810437175752936567055856354788920010e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.68173614889856753208609160981325142869126674376703e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.51314406729050474679104167088179431366818125495378e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.34761298487625202628055609797763667089439508997133e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.18542989150474476021386171696646255551749710231498e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.02682588217352113718763723238477506303886824319698e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.87198162616574587643892683986466286065908113745252e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.72103273987122377848712745659220575162428504342616e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.57407495406186983743605562964666679270071230864471e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43116899959973594531023845173036713969013392723102e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.29234516251397199192651299464921333716573791974662e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15760748076748645055488989791340851226714007738364e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.02693757158893810017279457245403730002174117799037e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.90029809071641448105436257998791834551248404261607e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.77763583398925423153274670477923396332207389930228e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.65888449807117311533204732428033623409891855409854e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54396712125067723418220466854740997412335135067190e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.43279822772290677556963263763332318533176144423252e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.32528569991247399535438745782660939694066027765934e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22133240358047652363270514524313049653387394918767e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.02369809798231953113428166635313554905220109187317e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.83906636305703029699585588701017275270199049001874e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.66659880410174168058255881195330253101771976294405e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.50546918921642430463614552224697602360825772543874e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.35487818043331104504371400184387811515127569393924e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21406287803805182989498838608259782302575864780068e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08230251972026412011949512752176880684353989211715e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.95892137955791216377776422765473500278791869190915e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.73482304428036497969228337981626460615062000101484e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.53726574594336905740080909649574950553268325619483e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36241623919674064800758850630627985803841789205793e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20703326641131605736154832376145761394336022854421e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06838505188748239203975430544588451128321849533425e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94417115473432990296531192076958397534476918792165e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83245284963781594618151921117440800008498472789113e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73159318667565938775602634998889798568285425838996e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55711578807627374563663112449716845329409235835031e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41195772919137905096072271267386767461243360595464e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28970299947128958118715738386884952018837887682427e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18559468377206387219691001062092333014605756352542e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.09605408240350517446279689818817592321144874341997e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01835072365310178463358709545432254499736603605337e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.50373381734223223242496667733334951968871287588448e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.90469147411748292410422813492550092929922517268105e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.89904178519375419646729692713027452766956304810423e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.08967063825624536726393015854367927398248532043928e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.42552960269528172637745151674852945361026757129572e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.87155991603325430279966781361705985837149401924358e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.40297179699419503759356245733027728020687579530781e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.00178775061898975362196226343808042189550214610720e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.65467961100606469227978737829290437667112287748795e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35157264931262089761621934621402648953813567255367e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.84807181681993477724365847791930879377149510380933e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44719141256297825765636022076703604777398281522436e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12076533048217442252783357779404422496811288961954e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84999992100002427148546326508663805148388380497970e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.62189540632143171631461472152513248987041904718627e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42718378313424593748302632910098340183096121887744e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25908549106503559691958010353777133320615316433912e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.11253031965493064317003259449214452744697000964364e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86945568960396455137037115431688883900622129241921e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.67614509428664592918239573763993132586708338097148e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51880213758692367036733937634308433186738240289411e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38828350661980875444222979837389265249992909781147e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.27829420024685114258602774435891775801837844690349e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18436085060190590919501220038943005140750783789755e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10321854418155548568175979136702978995567636390268e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03242932255189471157625857327944947615628400066886e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.14901073563432515854150573798153430678589620957278e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.21304799594562916903235283213953478595851140543801e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.45021033455396813779806884621309261695988381836678e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.81662081970375351344432645398857600988792278643367e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.28205524656921301117582740567146809114392866561466e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.82502022536734166688286248837161828142031676980706e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42981578145699797251974720633677573816663490439276e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.08470827750959570805565668839133674990776529552199e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51875926400324844459822808369891291211052352126700e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25243798184186193073508388843632555296804735961418e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.24222666597149382309278694960823422316591681354937e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.11546656864925721385400506911714668228522959074988e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55615661646764419866569094853675423840407090817980e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.77642694561457848096607089076550607777923417509897e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.88702045347145575328162812652239839108265167027629e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94318591305792177162247519775866592794374293269769e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.71505353250968890581644704744607604904036466331858e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.85729142998688039060495209150963975787147384530557e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42858279647060361837158514176420066606677711840386e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21427464712490151175367196840586966659512986509712e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.07132880360935675845351982773327267915998066915668e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03565265516245957916479321487698564090018898313690e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51782323124974173858669998521646771346131838415381e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.58910801622650317351735034158503382297981706223572e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79455187330704471571840926063516392410676855026673e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89727537800148613712401816975344760624742045931026e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.48637543100068173667331269634864926724205314304352e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74318733515442703770226252397963326193722512832281e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37159356859216986241429458639296184020669300874754e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18579675857518154364748998529259019922036982534955e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92898372613704146131858586390099325449659727514527e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96449184577465200097456706940455493448424599625049e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48224591841157074706460083370955443589105978681308e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70561478725616554450782995120929832467409189826579e-11), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.26403696230051280181276990208319690098486010067856e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31600924018782655652761250713319794226217154394605e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.79002310021356047142575775154740548981748412961642e-13), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44750577503651834833347404492486283164453776697661e-13), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.61876443758020639017543150875739203546579850550506e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04691109394324465958560412889011979187196424398413e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26172777348533543325299108739056247374472601114618e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.65431943371302795435372191153510741742082062236667e-16), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41357985842823674325840669958475387008031322630868e-16), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53394964607057868542015717421467433254478008250729e-17), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.83487411517643815597510900540151314745726776276537e-18), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20871852879410898385647064972035961074675685323943e-18), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.52179632198527209999982790078399565586115610945978e-19), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38044908049631800172983942388255378445601347506470e-19), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45112270124079498928544180507695627740193986743475e-20), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15695168827549686524997208584894563633442321098092e-21), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34809480517218554065439979557746061380338130338956e-22), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.42559253232615962903750225978472507493251963237152e-24), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.26599533270384976814627294052855716278192786930198e-25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29124708293990610509133147452548575130220036516594e-26), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05702942683744131568207851461222366485608388136168e-27), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28564339177340082230129892190826718627845694793237e-28), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.03527119858375513938311820075537974687844953537787e-30), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.02204449911484696211444887297771791655738720297691e-31), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13877781194677935132153054550953911471083914854259e-32), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96173613246673709457595659093933570122399892106597e-33), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22608508279171068410997286933691737981629848926873e-34), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.66303176744819177568733043335566577797931906017599e-36), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.78939485465511985980458152084728836560995745045025e-37), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99337178415944991237786345052955511752869068868957e-38), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87085736509965619523616465658097194397480643905015e-39), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87085736509965619523616465658097194397480643905015e-39), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.30803658242053201264126818976942165494933536744296e-42), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85470179000802031743799538662868033396263440343802e-44), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11511788672188293649921694790182825545415091296783e-46), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35592924500735522070006620274151662286777695301906e-49), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70153486133099813308596336044590493080772537219152e-51), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.64662055207421145736704437674181613596767723512183e-54), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.59633615315398885053400170966477192811237391996946e-56), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01419380982577689473984441783780153441889606248807e-58), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96169456963194099507751725717891224382381274409403e-61), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54753694126247695120215517858551259524367685316173e-63), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.04506617680655059063341866634965857517061270766301e-66), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.36135397531505882446617916654283538092602058893086e-68), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22403896607444853307101236930795070674226792551118e-71), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.60314022112283145823086420676091824482119840840280e-73), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40747664887610603837143133076598368938328062828235e-75), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.49795565967228921238840363580462378665343995422791e-78), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_landau_quantile_nearzero() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    landau_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.37666142124664870427736212608782014348092232942703e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.16727296241754547290632950718657110865371011883488e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.01093317611810417245689707499918702758654056685650e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.77109518013577849065583862782160134594408514905936e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.55147864363163697663604936395052484592948168261254e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.39830737061008092918841929339491480637487863539219e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.28068260002115642528019835718799250663117082682413e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.17764764050720242897742634974454174978089714315837e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.07332710058680816601395586920882835843791948709923e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.95448171270527198765810574858446724358448730774699e-1), tolerance);
+
+    // Relative error decreases near the root.
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.09096582988749397639311472333288802663134525410086e-2), tolerance * 10);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.74557416577759248536854968412795127813716796439183e-2), tolerance * 10);
+
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60865269786540938863996714417993425933643390360701e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90647080532071673100911236876323489196027799942628e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28310640852292004320545764804480585932279888826464e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.75630143945078214396279308922575172688187402052064e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34751649957416497391423738312790099231331963193670e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.08338732735341567163440035550389980881822036530441e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.09977916217503813825408682237719484284367467701518e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31348919222343858173602105619413807214737739605647e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55537778016228176635489075791437025790508273018206e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83358201486120130332744845371832465533289389791770e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15968268411007811908706240304578653239207877720217e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.55081568282045925871949387822806899205587760602861e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03356573722052416577951184486453626014326770622663e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65170109428623528831428068089551728119930570543517e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48307637103506875745328267812359172615004959210209e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68160868054034088524891526884683024288168516466432e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60235856635301171960591513042024596858455255485505e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12983583770683756688253603663598582344177172936943e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19714287107110229779819680655029619622254635434578e1), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_landau_quantile_lower() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    landau_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.77109518013577849065583862782160134594408514905936e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.16727296241754547290632950718657110865371011883488e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.37666142124664870427736212608782014348092232942703e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.53937945687640569563878953898367418895418286968892e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.67193057806636817209385515556513905369159305215132e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.78348038398799867332294266481364804262899249535026e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.96398712546539547900868300204740158406402253850740e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.10672834330580928387396530276881975373360375562766e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.22455234225296140347183338705883529117630663916504e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.32474749499506228416012679106564721359216355271060e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.48880222197195858123676152921728490355916489072920e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.62012266244208104039475013076613716084830146896364e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.72950770317262402197221324973838006547359087175177e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.82318656228158372073367735499500997206084370651970e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.97778747939490003780916437343292886429806927997697e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.10253941103391799610961378724535481625827036745191e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.20706965504048009454149883353144814083641366738928e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.29700011190686230364493911161520662289614047608888e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.44616534249558131319374859709618689440842790052762e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.56714397893479094507321490741760621098238204564655e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.66888826418737343076580628480419340009764391741662e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.75666995985336007747791649448887717847452667971341e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.90272386559500090814947503869007637264448398505091e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.02154690255519314578158695811138813197102888432118e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.12170243205222902851790251956181678240666087312573e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.20826069989721596260510558511263030379850705902299e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.35254674338923723387138280464388000015841039600555e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.47014796872616976009165833857464389146621180677518e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.56940486432189047495132801898571896454756873328884e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -512)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.65527239540648657446629479052874024150007745617425e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -640)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.79856256497052731627737920479094219620428520302497e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -768)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.91547732272180866354170476973276286391282782030491e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -896)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.01423042502756027207185677104980034013809957436307e0), tolerance);
+
+    // The test is terminated because p = 0 after this in fp64.
+    if(N <= 53){
+        return;
+    }
+
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.09971143249822249471944441552701750744803726485204e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1280)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.24244558247619917128857214117595378487244291900164e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1536)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.35897805692044314674029465486888992071638929853610e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1792)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.45745119510512195777968048015384189694395480185084e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -2048)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.54271778755494231572464179212263712867955955556373e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -2560)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.68514415447058604503373799640504720298538164608415e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3072)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.80146552760710346457341432134646648154958472048223e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3584)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.89978436824284434538360953970051552537044157804319e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -4096)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.98493298246627952401490656857159297529934857258768e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -5120)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.12719032649420608124001447392893816734788637468278e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -6144)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.24339602515596353391422898038238569270302761832424e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -7168)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.34163046670299704746360546090336718985578875063595e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -8192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.42671464308364892089984144203590287404801291341155e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -10240)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.56887983067842563164669431493815471028291272019257e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -12288)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.68502258391108555173104319503169367212234002338911e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -14336)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.78321117119788305968192558554365316820805283962570e0), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_landau_quantile_upper() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    landau_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.75630143945078214396279308922575172688187402052064e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34751649957416497391423738312790099231331963193670e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.08338732735341567163440035550389980881822036530441e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.09977916217503813825408682237719484284367467701518e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31348919222343858173602105619413807214737739605647e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55537778016228176635489075791437025790508273018206e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83358201486120130332744845371832465533289389791770e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15968268411007811908706240304578653239207877720217e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.55081568282045925871949387822806899205587760602861e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03356573722052416577951184486453626014326770622663e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.65170109428623528831428068089551728119930570543517e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48307637103506875745328267812359172615004959210209e0), tolerance);
+
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68160868054034088524891526884683024288168516466432e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12983583770683756688253603663598582344177172936943e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19714287107110229779819680655029619622254635434578e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.28089946846201448479186405990829330152038273940336e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40066013407733304728124190173163568113870838023174e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65941656579816517264930679482280545790703168918131e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.55753077855274500558851746639632584040672341383365e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61233279175261252785344820794971528959731572291875e3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04359994051668217545134727183452999841836322543240e4), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.17280170760481128545941744972976378785527707571987e4), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.67552483213163052181906697701099131817014739251016e5), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06807174647511043368265795578221114430723661526053e7), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70891330693062153503879914631960898643362032945242e8), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.73426111586976542228936210986127280848086481329720e9), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.99970842207360156603062974124919047698394307869345e11), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79192535600728707050158319385235151176991681631485e14), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58732891137812933185516807516215074813520642490265e16), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17435620131280049336696928005840807489266101470318e19), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69626080092356929518666088104130567689669972090860e23), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.04382147849327037329330237397632338076523897478536e28), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.30551884414534967184149864106941373195910184715021e33), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16630482969909636093804454941121895928783047320959e38), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.30420839672446839098151322191224148323438073311282e47), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99612707791001852622513406287046677525461975485236e57), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71632351102835736008910691332442626839731739200064e67), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37155334922269019018360983857591746072979653277477e76), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_landau_locscale_param() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    landau_distribution<RealType> dist_0_1(static_cast<RealType>(0), static_cast<RealType>(1));
+    landau_distribution<RealType> dist_1_3(static_cast<RealType>(1), static_cast<RealType>(3));
+    landau_distribution<RealType> dist_0_pihalf(static_cast<RealType>(0), boost::math::constants::pi<RealType>() / 2);
+
+    BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.3726364400044818244844049010588577710), tolerance);
+    BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.3726364400044818244844049010588577710) + log(static_cast<RealType>(3)), tolerance);
+    BOOST_CHECK_CLOSE(entropy(dist_0_pihalf), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.8242191452939366892106001309537399145), tolerance);
+
+    BOOST_CHECK_CLOSE(median(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.57563014394507821439627930892257517269), tolerance);
+    BOOST_CHECK_CLOSE(
+        median(dist_1_3),
+        (1 + 3 * (BOOST_MATH_BIG_CONSTANT(RealType, N, 0.57563014394507821439627930892257517269) + 2 / boost::math::constants::pi<RealType>() * log(static_cast<RealType>(3)))),
+        tolerance
+    );
+    BOOST_CHECK_CLOSE(median(dist_0_pihalf), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.3557804209908013250320928093906509105), tolerance);
+
+    BOOST_CHECK_CLOSE(mode(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.42931452986133525016556463510885028346), tolerance);
+    BOOST_CHECK_CLOSE(
+        mode(dist_1_3),
+        (1 + 3 * (BOOST_MATH_BIG_CONSTANT(RealType, N, -0.42931452986133525016556463510885028346) + 2 / boost::math::constants::pi<RealType>() * log(static_cast<RealType>(3)))),
+        tolerance
+    );
+    BOOST_CHECK_CLOSE(mode(dist_0_pihalf), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.222782981256408504061824283124805665631673572953417648794046), tolerance);
+
+    BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.16353124086802260347581305180210465276342), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.09034495984298569669047391552324642172044), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist_0_pihalf, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.14520663709640194253543670745173917702186), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.57786675964195234763672948547568695034661), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.17119311431882309272302517476020685228892), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_0_pihalf, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.45101809281952585982591839302215356640746), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_0_pihalf, quantile(dist_0_pihalf, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_0_pihalf, quantile(dist_0_pihalf, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+}
+
+BOOST_AUTO_TEST_CASE(landau_pdf_fp64)
+{
+    do_test_landau_pdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(landau_pdf_fp64_std)
+{
+    do_test_landau_pdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(landau_pdf_fp128)
+{
+    do_test_landau_pdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(landau_cdf_fp64)
+{
+    do_test_landau_cdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(landau_cdf_fp64_std)
+{
+    do_test_landau_cdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(landau_cdf_fp128)
+{
+    do_test_landau_cdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(landau_ccdf_fp64)
+{
+    do_test_landau_ccdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(landau_ccdf_fp64_std)
+{
+    do_test_landau_ccdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(landau_ccdf_fp128)
+{
+    do_test_landau_ccdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(landau_quantile_nearzero_fp64)
+{
+    do_test_landau_quantile_nearzero<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(landau_quantile_nearzero_fp64_std)
+{
+    do_test_landau_quantile_nearzero<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(landau_quantile_nearzero_fp128)
+{
+    do_test_landau_quantile_nearzero<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(landau_quantile_lower_fp64)
+{
+    do_test_landau_quantile_lower<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(landau_quantile_lower_fp64_std)
+{
+    do_test_landau_quantile_lower<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(landau_quantile_lower_fp128)
+{
+    do_test_landau_quantile_lower<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(landau_quantile_upper_fp64)
+{
+    do_test_landau_quantile_upper<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(landau_quantile_upper_fp64_std)
+{
+    do_test_landau_quantile_upper<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(landau_quantile_upper_fp128)
+{
+    do_test_landau_quantile_upper<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(landau_locscale_fp64)
+{
+    do_test_landau_locscale_param<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(landau_locscale_fp64_std)
+{
+    do_test_landau_locscale_param<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(landau_locscale_fp128)
+{
+    do_test_landau_locscale_param<cpp_bin_float_quad, 113>();
+}
+#endif
+
diff --git a/test/test_landau_cdf_double.cu b/test/test_landau_cdf_double.cu
new file mode 100644
index 0000000000..40bff707d8
--- /dev/null
+++ b/test/test_landau_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::landau_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_landau_cdf_float.cu b/test/test_landau_cdf_float.cu
new file mode 100644
index 0000000000..c4513c0844
--- /dev/null
+++ b/test/test_landau_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::landau_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_landau_cdf_nvrtc_double.cpp b/test/test_landau_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f23a758e12
--- /dev/null
+++ b/test/test_landau_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/landau.hpp>
+extern "C" __global__ 
+void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_landau_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::landau_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_landau_cdf_nvrtc_float.cpp b/test/test_landau_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..1a339724e8
--- /dev/null
+++ b/test/test_landau_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/landau.hpp>
+extern "C" __global__ 
+void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_landau_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::landau_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_landau_pdf_double.cu b/test/test_landau_pdf_double.cu
new file mode 100644
index 0000000000..6ce3f5f784
--- /dev/null
+++ b/test/test_landau_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::landau_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_landau_pdf_float.cu b/test/test_landau_pdf_float.cu
new file mode 100644
index 0000000000..5818ddf8a5
--- /dev/null
+++ b/test/test_landau_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::landau_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_landau_pdf_nvrtc_double.cpp b/test/test_landau_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1e8df7d07e
--- /dev/null
+++ b/test/test_landau_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/landau.hpp>
+extern "C" __global__ 
+void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_landau_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::landau_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_landau_pdf_nvrtc_float.cpp b/test/test_landau_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..1e8df7d07e
--- /dev/null
+++ b/test/test_landau_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/landau.hpp>
+extern "C" __global__ 
+void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_landau_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::landau_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_landau_quan_double.cu b/test/test_landau_quan_double.cu
new file mode 100644
index 0000000000..4995bd49cf
--- /dev/null
+++ b/test/test_landau_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::landau_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_landau_quan_float.cu b/test/test_landau_quan_float.cu
new file mode 100644
index 0000000000..4995bd49cf
--- /dev/null
+++ b/test/test_landau_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::landau_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_landau_quan_nvrtc_double.cpp b/test/test_landau_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..f4a5b95534
--- /dev/null
+++ b/test/test_landau_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/landau.hpp>
+extern "C" __global__ 
+void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_landau_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::landau_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_landau_quan_nvrtc_float.cpp b/test/test_landau_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..4a10b497cf
--- /dev/null
+++ b/test/test_landau_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/landau.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/landau.hpp>
+extern "C" __global__ 
+void test_landau_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::landau_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_landau_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_landau_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_landau_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::landau_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_laplace.cpp b/test/test_laplace.cpp
index df79c8a584..965c4c6575 100644
--- a/test/test_laplace.cpp
+++ b/test/test_laplace.cpp
@@ -1,7 +1,7 @@
 //  Copyright Thijs van den Berg, 2008.
 //  Copyright John Maddock 2008.
 //  Copyright Paul A. Bristow 2008, 2009, 2014.
-
+//  Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -62,7 +62,7 @@ Test 8: test_extreme_function_arguments()
 
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/constants/constants.hpp>
 #include <boost/math/distributions/laplace.hpp>
 #include "test_out_of_range.hpp"
diff --git a/test/test_laplace_cdf_double.cu b/test/test_laplace_cdf_double.cu
new file mode 100644
index 0000000000..ec3c83ecde
--- /dev/null
+++ b/test/test_laplace_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::laplace_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_laplace_cdf_float.cu b/test/test_laplace_cdf_float.cu
new file mode 100644
index 0000000000..96acea2fda
--- /dev/null
+++ b/test/test_laplace_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::laplace_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_laplace_cdf_nvrtc_double.cpp b/test/test_laplace_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..9d35b5862a
--- /dev/null
+++ b/test/test_laplace_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/laplace.hpp>
+extern "C" __global__ 
+void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_laplace_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::laplace_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_laplace_cdf_nvrtc_float.cpp b/test/test_laplace_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..aacf59cb07
--- /dev/null
+++ b/test/test_laplace_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/laplace.hpp>
+extern "C" __global__ 
+void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_laplace_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::laplace_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_laplace_pdf_double.cu b/test/test_laplace_pdf_double.cu
new file mode 100644
index 0000000000..568be622b5
--- /dev/null
+++ b/test/test_laplace_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::laplace_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_laplace_pdf_float.cu b/test/test_laplace_pdf_float.cu
new file mode 100644
index 0000000000..cb2aa67c11
--- /dev/null
+++ b/test/test_laplace_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::laplace_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_laplace_pdf_nvrtc_double.cpp b/test/test_laplace_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..49c3864159
--- /dev/null
+++ b/test/test_laplace_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/laplace.hpp>
+extern "C" __global__ 
+void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_laplace_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::laplace_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_laplace_pdf_nvrtc_float.cpp b/test/test_laplace_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..2100e2cdcd
--- /dev/null
+++ b/test/test_laplace_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/laplace.hpp>
+extern "C" __global__ 
+void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_laplace_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::laplace_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_laplace_quan_double.cu b/test/test_laplace_quan_double.cu
new file mode 100644
index 0000000000..ec3c83ecde
--- /dev/null
+++ b/test/test_laplace_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::laplace_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_laplace_quan_float.cu b/test/test_laplace_quan_float.cu
new file mode 100644
index 0000000000..96acea2fda
--- /dev/null
+++ b/test/test_laplace_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::laplace_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_laplace_quan_nvrtc_double.cpp b/test/test_laplace_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..cf02db5a53
--- /dev/null
+++ b/test/test_laplace_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/laplace.hpp>
+extern "C" __global__ 
+void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_laplace_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::laplace_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_laplace_quan_nvrtc_float.cpp b/test/test_laplace_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..36472aaa25
--- /dev/null
+++ b/test/test_laplace_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/laplace.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/laplace.hpp>
+extern "C" __global__ 
+void test_laplace_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::laplace_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_laplace_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_laplace_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_laplace_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::laplace_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_lgamma_double.cu b/test/test_lgamma_double.cu
new file mode 100644
index 0000000000..776ff5d271
--- /dev/null
+++ b/test/test_lgamma_double.cu
@@ -0,0 +1,102 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::lgamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::lgamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_lgamma_float.cu b/test/test_lgamma_float.cu
new file mode 100644
index 0000000000..101037ab30
--- /dev/null
+++ b/test/test_lgamma_float.cu
@@ -0,0 +1,102 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::lgamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::lgamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_log1p_double.cu b/test/test_log1p_double.cu
new file mode 100644
index 0000000000..d164b5a98d
--- /dev/null
+++ b/test/test_log1p_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::log1p(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::log1p(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_log1p_float.cu b/test/test_log1p_float.cu
new file mode 100644
index 0000000000..d164b5a98d
--- /dev/null
+++ b/test/test_log1p_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::log1p(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::log1p(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_log1p_nvrtc_double.cpp b/test/test_log1p_nvrtc_double.cpp
new file mode 100644
index 0000000000..36b0771b1b
--- /dev/null
+++ b/test/test_log1p_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/log1p.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/log1p.hpp>
+extern "C" __global__ 
+void test_log1p_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::log1p(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_log1p_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_log1p_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_log1p_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::log1p(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_log1p_nvrtc_float.cpp b/test/test_log1p_nvrtc_float.cpp
new file mode 100644
index 0000000000..7194ffb56a
--- /dev/null
+++ b/test/test_log1p_nvrtc_float.cpp
@@ -0,0 +1,188 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/log1p.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/log1p.hpp>
+extern "C" __global__ 
+void test_log1p_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::log1p(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_log1p_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_log1p_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_log1p_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::log1p(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_log1p_simple.cpp b/test/test_log1p_simple.cpp
new file mode 100644
index 0000000000..ef6c204d4c
--- /dev/null
+++ b/test/test_log1p_simple.cpp
@@ -0,0 +1,48 @@
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <random>
+#include <cmath>
+#include <boost/math/special_functions/log1p.hpp>
+#include "math_unit_test.hpp"
+
+constexpr int N = 50000;
+
+template <typename T>
+void test()
+{
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<T> dist(0, 0.01);
+
+    for (int n = 0; n < N; ++n)
+    {
+        const T value (dist(rng));
+        CHECK_ULP_CLOSE(std::log1p(value), boost::math::log1p(value), 10);
+    }
+}
+
+template <typename T>
+void test_log1pmx()
+{
+    std::mt19937_64 rng(42);
+    std::uniform_real_distribution<T> dist(0, 0.01);
+
+    for (int n = 0; n < N; ++n)
+    {
+        const T value (dist(rng));
+        CHECK_ULP_CLOSE(std::log1p(value) - value, boost::math::log1pmx(value), 1e9);
+    }
+}
+
+int main()
+{
+    test<float>();
+    test<double>();
+
+    test_log1pmx<float>();
+    test_log1pmx<double>();
+
+    return boost::math::test::report_errors();
+}
diff --git a/test/test_logistic_cdf_double.cu b/test/test_logistic_cdf_double.cu
new file mode 100644
index 0000000000..6b4e850259
--- /dev/null
+++ b/test/test_logistic_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::logistic_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_logistic_cdf_float.cu b/test/test_logistic_cdf_float.cu
new file mode 100644
index 0000000000..75b6ab0afd
--- /dev/null
+++ b/test/test_logistic_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::logistic_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_logistic_cdf_nvrtc_double.cpp b/test/test_logistic_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..5548dc412d
--- /dev/null
+++ b/test/test_logistic_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/logistic.hpp>
+extern "C" __global__ 
+void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_logistic_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::logistic_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_logistic_cdf_nvrtc_float.cpp b/test/test_logistic_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..ea65cc97cb
--- /dev/null
+++ b/test/test_logistic_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/logistic.hpp>
+extern "C" __global__ 
+void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_logistic_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::logistic_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_logistic_dist.cpp b/test/test_logistic_dist.cpp
index 3bb092ce7e..c5114adbd4 100644
--- a/test/test_logistic_dist.cpp
+++ b/test/test_logistic_dist.cpp
@@ -1,6 +1,6 @@
 // Copyright 2008 Gautam Sewani
 // Copyright 2013 Paul A. Bristow
-
+// Copyright 2024 Matt Borland
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -12,11 +12,14 @@
 #endif
 
 #include <boost/config.hpp>
-#ifndef BOOST_NO_EXCEPTIONS
-#define BOOST_MATH_UNDERFLOW_ERROR_POLICY throw_on_error
-#define BOOST_MATH_OVERFLOW_ERROR_POLICY throw_on_error
+#include <boost/math/tools/config.hpp>
+
+#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS)
+#  define BOOST_MATH_UNDERFLOW_ERROR_POLICY throw_on_error
+#  define BOOST_MATH_OVERFLOW_ERROR_POLICY throw_on_error
 #endif
-#include <boost/math/tools/test.hpp>
+
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 #include <boost/math/distributions/logistic.hpp>
     using boost::math::logistic_distribution;
diff --git a/test/test_logistic_pdf_double.cu b/test/test_logistic_pdf_double.cu
new file mode 100644
index 0000000000..90232a2d6a
--- /dev/null
+++ b/test/test_logistic_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::logistic_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_logistic_pdf_float.cu b/test/test_logistic_pdf_float.cu
new file mode 100644
index 0000000000..0a99ff9cf1
--- /dev/null
+++ b/test/test_logistic_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(-10000, 10000);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::logistic_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_logistic_pdf_nvrtc_double.cpp b/test/test_logistic_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..d287225cf6
--- /dev/null
+++ b/test/test_logistic_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/logistic.hpp>
+extern "C" __global__ 
+void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_logistic_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::logistic_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_logistic_pdf_nvrtc_float.cpp b/test/test_logistic_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..9339a6db36
--- /dev/null
+++ b/test/test_logistic_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/logistic.hpp>
+extern "C" __global__ 
+void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_logistic_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::logistic_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_logistic_quan_double.cu b/test/test_logistic_quan_double.cu
new file mode 100644
index 0000000000..afe8a4c8cd
--- /dev/null
+++ b/test/test_logistic_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::logistic_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_logistic_quan_float.cu b/test/test_logistic_quan_float.cu
new file mode 100644
index 0000000000..92c371062f
--- /dev/null
+++ b/test/test_logistic_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::logistic_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_logistic_quan_nvrtc_double.cpp b/test/test_logistic_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..f763db82cc
--- /dev/null
+++ b/test/test_logistic_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/logistic.hpp>
+extern "C" __global__ 
+void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_logistic_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::logistic_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_logistic_quan_nvrtc_float.cpp b/test/test_logistic_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..b14c3c5daa
--- /dev/null
+++ b/test/test_logistic_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/logistic.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/logistic.hpp>
+extern "C" __global__ 
+void test_logistic_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::logistic_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_logistic_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_logistic_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_logistic_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::logistic_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_lognormal.cpp b/test/test_lognormal.cpp
index 759944b396..6fa4a6aa26 100644
--- a/test/test_lognormal.cpp
+++ b/test/test_lognormal.cpp
@@ -8,14 +8,19 @@
 
 // test_lognormal.cpp
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/math/distributions/lognormal.hpp>
     using boost::math::lognormal_distribution;
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include "test_out_of_range.hpp"
 
 #include <iostream>
diff --git a/test/test_lognormal_cdf_double.cu b/test/test_lognormal_cdf_double.cu
new file mode 100644
index 0000000000..288240a43c
--- /dev/null
+++ b/test/test_lognormal_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::lognormal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_lognormal_cdf_float.cu b/test/test_lognormal_cdf_float.cu
new file mode 100644
index 0000000000..d9411a1b4a
--- /dev/null
+++ b/test/test_lognormal_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::lognormal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_lognormal_cdf_nvrtc_double.cpp b/test/test_lognormal_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..0ab15206c9
--- /dev/null
+++ b/test/test_lognormal_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/lognormal.hpp>
+extern "C" __global__ 
+void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_lognormal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::lognormal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_lognormal_cdf_nvrtc_float.cpp b/test/test_lognormal_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..308e8c85e3
--- /dev/null
+++ b/test/test_lognormal_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/lognormal.hpp>
+extern "C" __global__ 
+void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_lognormal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::lognormal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_lognormal_pdf_double.cu b/test/test_lognormal_pdf_double.cu
new file mode 100644
index 0000000000..67bb63a2cd
--- /dev/null
+++ b/test/test_lognormal_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::lognormal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_lognormal_pdf_float.cu b/test/test_lognormal_pdf_float.cu
new file mode 100644
index 0000000000..ac8382dfd1
--- /dev/null
+++ b/test/test_lognormal_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::lognormal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_lognormal_pdf_nvrtc_double.cpp b/test/test_lognormal_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1799559330
--- /dev/null
+++ b/test/test_lognormal_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/lognormal.hpp>
+extern "C" __global__ 
+void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_lognormal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::lognormal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_lognormal_pdf_nvrtc_float.cpp b/test/test_lognormal_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..f66c8bf4bb
--- /dev/null
+++ b/test/test_lognormal_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/lognormal.hpp>
+extern "C" __global__ 
+void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_lognormal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::lognormal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_lognormal_quan_double.cu b/test/test_lognormal_quan_double.cu
new file mode 100644
index 0000000000..056177e006
--- /dev/null
+++ b/test/test_lognormal_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::lognormal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_lognormal_quan_float.cu b/test/test_lognormal_quan_float.cu
new file mode 100644
index 0000000000..65a9188d77
--- /dev/null
+++ b/test/test_lognormal_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch lognormal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::lognormal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_lognormal_quan_nvrtc_double.cpp b/test/test_lognormal_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..ddde58fc0b
--- /dev/null
+++ b/test/test_lognormal_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/lognormal.hpp>
+extern "C" __global__ 
+void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_lognormal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::lognormal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_lognormal_quan_nvrtc_float.cpp b/test/test_lognormal_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..e121f3b380
--- /dev/null
+++ b/test/test_lognormal_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/lognormal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/lognormal.hpp>
+extern "C" __global__ 
+void test_lognormal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::lognormal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_lognormal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_lognormal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_lognormal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::lognormal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_mapairy.cpp b/test/test_mapairy.cpp
new file mode 100644
index 0000000000..ca3b415d76
--- /dev/null
+++ b/test/test_mapairy.cpp
@@ -0,0 +1,957 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wliteral-range"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push 
+#  pragma GCC diagnostic ignored "-Woverflow"
+#endif
+
+#define BOOST_TEST_MAIN
+#define BOOST_TEST_MODULE StatsMapAiryTest
+#include <boost/math/tools/config.hpp>
+#include <boost/test/included/unit_test.hpp>
+#include <boost/test/tools/floating_point_comparison.hpp>
+
+#include <boost/math/distributions/mapairy.hpp>
+
+#if __has_include(<stdfloat>)
+# include <stdfloat>
+#endif
+
+using boost::math::mapairy_distribution;
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#include <boost/multiprecision/cpp_bin_float.hpp>
+using boost::multiprecision::cpp_bin_float_quad;
+#endif
+
+template<class RealType, int N>
+void do_test_mapairy_pdf(){
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    mapairy_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    // Left tail of MapAiry distribution inherently limits accuracy due to the rapid decay of the function value.
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.48902525259161778456663373601227358266560067430936e-4524), tolerance * 10000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.35000261887613150163386664453709286782071190547944e-3558), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.59514743738055878289124765721120558977841779893312e-2741), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23001358749520957481536587430803343649600724408224e-2059), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91092696891623714443869745182216876382044809172723e-1501), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07670398427978297595825050147335321586451776148320e-1054), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.75337241594382145623417946757631570270349308241803e-869), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.98271669611043473474221732621841457710581601420544e-707), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16498740545788718692214006589654128484512724342869e-566), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.49681712247552092430360122510975673204884714780707e-445), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55053727524864779623317373572781282350111182284876e-343), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19590456107919919197512531538035620765530980528894e-258), tolerance * 1000);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.73694941159984922185033097915349420991674156548250e-188), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.81449463353684665334966063303815524281822160159818e-132), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74974309038596237884318081144949974855326622002492e-109), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.29136035353058491211769990284207292488489027379633e-89), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01585226509378718416615473646349537501789989363229e-71), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37033445628502141399361852883285362383866808742814e-56), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34096664947790338152990789779350354526301978444227e-43), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68877771535989487461767283800177475346958322976577e-33), tolerance * 100);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82006696453724258217996541433935771573414304428384e-24), tolerance * 10);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54482240843563125964619012431713490038548382437138e-17), tolerance * 10);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95446089249867191439127373027848930673014745742088e-14), tolerance * 10);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.50933920988971071711086365057147867083089476835078e-12), tolerance * 10);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.93525639583557913222186453754518161964863338679333e-10), tolerance * 10);
+
+    #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34369873917936310843493426518284206307542912325702e-8), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77757355685380561185988935905104213909281485602180e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.67935873875498302175093073530783568485155988744416e-5), tolerance);
+    #else
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.34369873917936310843493426518284206307542912325702e-8), tolerance*10);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.77757355685380561185988935905104213909281485602180e-6), tolerance*10);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.67935873875498302175093073530783568485155988744416e-5), tolerance*10);
+    #endif
+    
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63150314678587224494834533488274597847689803128076e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.66981984951457236403520646020207894651500649740558e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04268591430185285447948956767040247962753669747157e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.09373105986325625209096934627967360395341037677581e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.80640911685196781581446750934825559197579090806069e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.30714423198107251239523977966407204142168461729995e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.58842018103237928265884912263898874481595562824878e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34619157601942089113215976120224270739528013289579e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.75682912797714827638251175725141830254079472660215e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14483832832989822788477500521594411868042501441052e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31592413389237158115773602554741285666601187136132e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46510720433737601145589738058516645303764778406120e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.58858272589879457467907897007761708777068403816847e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.68368758744508533942159519264649183356982627934537e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74896964572402100481706929301679979523752439534115e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78416838013401425639828509948809230256694219667844e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79011912859672711819203848701778087056321687064703e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76859868856746781256050397658493368372419755176282e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.74830251162716136956231178987638601629634664517919e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72213275423897601118140737012980725371274046892426e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69048759814952957802002736887143054018790117979041e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65378566438926186069214998647211849395648980215839e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61245973171416933849635378258875498386938909727468e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56695079986621470612303095505427278554320952987612e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51770257100814033767420472605045312962380868746169e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46515640532722451246144916562599607636382596292905e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.40974679016952551097229293394786875981693451661862e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.35189734654776422654774161240658760813142609084491e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.29201738275575132039254286232840244115188891902544e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.23049899232453590038819913858072064402640465533782e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16771468279209758359211730425922824067563080471965e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10401551277763824950435892543895916698505857289016e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03972970763820126963564357339894388732551496850922e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97516171847191855609649452292217911972760948598791e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91059168530983397246791731314734572731339598377834e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84627526286757244484016179988853282946752058004097e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78244376604908808362842960720569088074360938826161e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71930459233559687354598878881397561629142415087483e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65704187907786028551352925788813828181432590627815e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59581735536596915694662484173754591721771767208970e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53577135041201613422320904399972678346505180911383e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47702392309337834830220258805531647259773671641587e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41967608032785834866384133800904772318199442922830e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36381105516273814949846883397888219175600292125024e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30949561875080790965818583863955676147392613436720e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25678140366749341891056123744605385205177726623077e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.20570621922063209990039201192595153753061823668443e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15629534246005614282003528019134390685312636509026e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10856277146400733535248998121026984388464547942987e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06251243013238748252181151646220197947016365256337e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.75430585815861237530788049942964394941727146945119e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94921730852144630624722291210866171228958719672719e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.20763203888240939750507494683136041919542710442553e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.52664687715163418854003417614711690443203995990429e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.90290701026305425949257079944431253582616228954748e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.33278922127799799766017618158404889945497518669482e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81254742548053622180113078981721846479230408163441e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33842514891989443409465171800884519330814876941244e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.51394330031190703409753192401513286012367671189008e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.83173172055121251708819783948372658040389943407125e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.26731989005960717430587605328985374107046652534330e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79973178629260510506067822703511722927370913396772e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.41138621807489585364575132430814179891782893106309e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.08776953835588218788258294328943072921113243548837e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.81702027546943420543987006667270192025143215867522e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58950538583133457383574346194006716984204842764692e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23449076237421124835496017947109115185689559096654e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.76628876438498186161275834549825153942245265285021e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.85705896089536983465980519032147649251164182381151e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.41725094843861993121315653435763289609915116903808e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.31264184292047732621870128909412110588268320306673e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45166594303360764259552410935368398129332569193241e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77078907451534652348813394683088320326083698637357e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22517551525042172427941302520759668293351400085609e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.41981613066309674421788083490352250077859372362907e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.86815272152296425461960094149288322349331776254317e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47672748088907799783338276475559452864086595323508e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19065885055336418361425251181752339740309873019493e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.76274498770011155322885494753359606185503116800378e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.12117840379514058437502004817680010914158504614072e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.84060120900265238260840046704576012273231257957218e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.82527663232857270992129793621400616909426228224712e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.34356222389517260476288187086191333487351565998487e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.33976274743353308671561884595553840397953308913098e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.63275420187782102031440426363742618331512228841827e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.11866107787483058586064754989935066085956111714438e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73477843721978250663679288968512001020601996296490e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44160680253022056601252575834792105383788377162879e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21335317476130403409545135558158224084771179668233e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03264853379349880038687006045193401398742550912390e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69348218890827802115199892879428047102258557679491e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.91237960791142062915636488977731277854393252956156e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.65910753386639565315739798326227665091960854776722e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.74840664437921635017204117045344765826413564522111e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.06868586281732253998273705361085730421392207516563e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54975706173930172767473002839017573406030702226132e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.14583740785288043496760695039190976938266337954109e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.82612140613373038344483207825545571090342007879025e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36036300551703042854867518460643847165588663407845e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04535842750631349697889823754314492592824987833936e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.23732650061248767465314076860143478427557447053122e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.62699739226146233957434830649961751777393343487704e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42516447612776691413186917108911633335528994759200e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.50767100225890854218096000852153526257706640350006e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79354212598752921854694346382153053933709178870365e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22829848834607910587947957072698363475405049514861e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.70691063363569289518344600523741530561019748614806e-7), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.00884949274594535609095332297684714775260624951825e-7), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78341094639115825398956813042728307580960455542744e-8), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15265496830848341113806943808080759239756631409525e-9), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.57315927453204720744811655603371636021315489208197e-10), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.85204679078224624495444948682547526667095102524654e-11), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74161227361870459284450362091832192677055853279409e-11), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.07876462219180661771716253718191722284989080940137e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.44253835507444012610805302754717855328014016707006e-13), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.62113944435292862305033337145279742309373660330666e-14), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70079323596084060715031479698836179832962876226040e-14), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00660607636030744539992938919621601865776189138704e-15), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.31497886237763070924617652541844323609885629317601e-16), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.39564398862596160919395415621974601387332621717152e-17), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66093089449300961525223017779109302708950139673730e-17), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.93613874644561300698599726959494859627890174964948e-18), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19040904529065504857204743126112225011195501281143e-19), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.17543358264254064703206600829360293655988827362640e-20), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62200282665332970268320245960144424095679956397013e-20), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86732299457579395219850121616992176031753071031156e-21), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.06875883329165532088522436776488158979648490753308e-22), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.96038435804935610062036418083267399916819002834560e-23), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58398713540364228777663367294171284220596445493878e-23), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80012011189042378144386404030074611545380892760386e-24), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.94995979813638214930198027960375045149298281432705e-25), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54686243691761942165686883762842249372631955082800e-26), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.83394511536756069267771511760113721074800848019252e-28), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51060784855236271646178597425041551955920802058680e-29), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.72064952672613348894308116953255143520457854922274e-31), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47520297710191671529471286547892233784021035466379e-32), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.61000930344348973529597770462163230645079307893137e-34), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44062790732609054227999303269426009576929146861433e-35), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.50196221039403294462497822716956279927920276478346e-37), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40686319074813529519530569599048837477475167906008e-38), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.39644747108792279748533029997027617117109903686087e-40), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37388983471497587421416571874071130349096844921335e-41), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.29340573348429960691926787106472282340927640380120e-43), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34168929171384362716227120970772588231539887618792e-44), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.19277903660576133488209753033664338223562148808726e-46), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31024344893930041715065547823020105694863171502727e-47), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.09451077793531380359579836946937830296447410946021e-49), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.99854568157745488632402184518493974898874424751974e-52), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90482976716548328742580258318841772362182055421850e-55), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.81331031949754227287676033514493918322443413497900e-58), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72393585888431862585621126478997967111761145994043e-61), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.63665611219171740806270631327146452257579244134808e-64), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55142198456222403131123663405416457282792230600398e-67), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.46818553179904690557737952544352009065226787695701e-70), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.38689993339750674372790969281593758852760534859083e-73), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.30751946620850267942178680939056405129648959823324e-76), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22999947871924089787283868104547270634422812327464e-79), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.15429636593675868932894402445846943978928527663539e-82), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08036754486011590754779689888522406229422390296425e-85), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00817143052745694096464540906760162333420303023853e-88), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.93766741262446966891078653229257971028730764671731e-91), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.86881583264108366104568997294197237332744887374738e-94), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80157796156355826273993161420114489582758679076892e-97), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.80157796156355826273993161420114489582758679076892e-97), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.67179294735294176362984811229814996321448020054714e-103), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54802031264585663187966166715445514985511798910822e-109), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42998152985177672565427939143605723367225455199072e-115), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.31741097436120674672534884589772914283013777922699e-121), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21005532680626558945212254133007921488775041506480e-127), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.10767300301195677705013517506606980789923707491379e-133), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01003361035533597664845960146529179372714717379932e-139), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.91691742930921170868726692339448146221842496280605e-145), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.82811491900368853443838779773185869428484436302762e-151), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74342624569290975040282039426027173450931965163004e-157), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66266083306590056457788505006816075755054440653805e-163), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58563693338956886728085045821014476542524758008771e-169), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.51218121851879965522847219296469189207577474602480e-175), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.44212838985328641436431140228718938071801638224106e-181), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37532080636337891994887485722273767539788854812723e-187), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31160812984788791651618467066072242297924856960987e-193), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_mapairy_cdf() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    mapairy_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    // Left tail of MapAiry distribution inherently limits accuracy due to the rapid decay of the function value.
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41267677661910675305382637108866964194971791718950e-4526), tolerance * 10000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.54257458395442655204313046627840914088415076161558e-3561), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.76526114681351440657658256501506814774341295354922e-2743), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27125192542763584917976723724200097302881304411026e-2062), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63420353908123838151105660636393196705034472693701e-1504), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.73063520387773809387189485017024200015891995562535e-1057), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87621737485296751690587747650586494006422187478425e-871), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.15431208733487028595846723973676765010037279135068e-709), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.43691228284359420616981379531226120901046405612148e-568), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94968725712637201733674598027292284194096042437396e-447), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29903053626343464859342616262182157628847143496274e-345), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.84047255379287327139957588241933355294847277196332e-260), tolerance * 1000);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79693358121995588307456046556605766916709047503732e-190), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.18430997009344263598977403572421080974427275299628e-134), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.48855014561480471542019201883561279219078606560987e-111), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21188569601559235190706821985880880180873775432009e-90), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.35130675597097791570481439579882762106711974084282e-73), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.37868088180310978639226395953519655144561270921646e-58), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.96213643222050268066173675729245650285594216946076e-45), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54299664164424690506837069440249011982208536500223e-34), tolerance * 100);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55256887328573175640232009727759270424877561672436e-25), tolerance * 10);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.76658378173553845515260323095527685644595481193099e-18), tolerance * 10);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53962634794926085939153812168733146892427038438756e-15), tolerance * 10);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86644224834640735242565183060109673582728544145215e-13), tolerance * 10);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03383983622833649608546095853620837892086215933298e-10), tolerance * 10);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.91537779419133030075704775849634109699414218730481e-9), tolerance * 10);
+
+    #ifndef BOOST_MATH_HAS_GPU_SUPPORT
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.98085764775551392183308859729746156105641469279537e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.74005376900861403666100209810033834996576655180465e-6), tolerance);
+    #else
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.98085764775551392183308859729746156105641469279537e-7), tolerance * 10);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.74005376900861403666100209810033834996576655180465e-6), tolerance * 10);
+    #endif
+
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38210242545410097936371084112947842536958832190953e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.20388922732913226923590659641162717163243937441333e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.01190141003747964176020229987886598385662366685178e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.81314867410709227363443913418400543623685087660036e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40335466769957406990026955582239481134043242080576e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65057217408705944017729221733489193253150025104939e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62249422314997336809404422537950480283670862386341e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.49463093988108215731880519187728405325669137953076e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.13731490830425435301322069056843269169838775278008e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62598955251978523174755901843430986522046460911826e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90499210557846036762061906872202100639194335579553e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20405659651083834253744598353980626247223640040816e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52019615753011340929652761968369152398962550184606e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.85001847038770817549042011993996310385733185736928e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.18987376977895959785022298515900161162070947963445e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.53600587820316569763364664186012515534206385253539e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.88469588484092929432583134493879046529813482615104e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.23238998449671083670041452413316011919930693856385e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.40482473418234444080637631800355214131813723348029e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.57580541347059341976751497432836530189703719631253e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74497724348320019407539810613151808302214091800752e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.91201100390145330029548239186193815674350921221373e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.07660411254690680532566892253363954450731338298810e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.23848132271235424597841060643701240046967758423031e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.39739506217816326324441201522899717272689507530078e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.55312544188864070398568309403385234827691551635647e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.70547996523808722092270387778468756135745550977842e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.85429297088468989836573146625524898390641430572229e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.99942484305170440309173928292359597658678248461166e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14076102348433999265595009037280955266566735857487e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27821085871238004955876961091597273535850920355153e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.41170631513311055182782737833523982566081113132113e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.54120059278782917662600347641177702821526512639253e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.66666666666666666666666666666666666666666666666667e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78809578204369927830516637473281842472755486735670e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.90549592781258492544813432997757039278246435622280e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.01889030914802776673544656107973638254340107333208e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.12831583813566709661955692651725308671816974125125e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23382165835700226601867127791408227947098667962247e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.33546771684027578538449554398877013955180783305967e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43332339433579641844979454704458465657719551526672e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.52746620257793576893322261719616438461321552900402e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.61798055507971738758762860873629690865112979477773e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.70495661608502783177600997939957544744070827966369e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.78848923058628458126731690124829998626066330325893e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86867693680400430433267457602884639416039914905443e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.94562106121611865967124973894234486976489176337189e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.01942489511196041360944230423166502583801465481058e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.09019295071828335416784977458008517243666113680032e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.15803029418984060112492565010063896412665755747313e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.28533238809939722865056318522346970220432633092471e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40216194581500702945839843944471675879234503695234e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.50932751104023843947651194694738668613497033313653e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.60760530927220029947644215642926175488027901722942e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.69773224168421631436187338078646751800825779589535e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.78040142637134263427912246171411479166776409276684e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85625978983111842670865940159150361843054620140300e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.92590726602475875901684499549667744162908005254207e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.04873777288161758991649770533103505875566036067315e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.15278911856248478088063341213669352039898456875983e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.24130503839415290663064766600863877055306793445333e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.31696102735179394495180164200593784488582138327694e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.38195112746199551037885830108115610693678956104594e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.43806900885896605694390358343940460845582243989715e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.48677954836791634391273767979450406335892640585045e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.52927980046477119746205391036666872656611789316554e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.59939373386293535847669182576818465955927373035467e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.65433945362867879413538593557411941750837354200812e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69816672594606569822299986511094581346810669802925e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.73368884642273321841166642302379087929797275815602e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.76289558464640301369422872101450331194067414657857e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.78721979671508698647720200666528542996105699216801e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.80771139145909061814002155447578879059651577452407e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.82515243555548699954994382507757214540918643206410e-1), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_mapairy_ccdf() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    mapairy_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.37401044748021476825244098156569013477953539088174e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.09500789442153963237938093127797899360805664420447e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.79594340348916165746255401646019373752776359959184e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.47980384246988659070347238031630847601037449815394e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.14998152961229182450957988006003689614266814263072e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.81012623022104040214977701484099838837929052036555e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.46399412179683430236635335813987484465793614746461e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.11530411515907070567416865506120953470186517384896e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.76761001550328916329958547586683988080069306143615e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.59517526581765555919362368199644785868186276651971e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42419458652940658023248502567163469810296280368747e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.25502275651679980592460189386848191697785908199248e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.08798899609854669970451760813806184325649078778627e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.92339588745309319467433107746636045549268661701190e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.76151867728764575402158939356298759953032241576969e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60260493782183673675558798477100282727310492469922e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.44687455811135929601431690596614765172308448364353e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.29452003476191277907729612221531243864254449022158e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.14570702911531010163426853374475101609358569427771e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00057515694829559690826071707640402341321751538834e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.85923897651566000734404990962719044733433264142513e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72178914128761995044123038908402726464149079644847e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.58829368486688944817217262166476017433918886867887e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45879940721217082337399652358822297178473487360747e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.33333333333333333333333333333333333333333333333333e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21190421795630072169483362526718157527244513264330e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.09450407218741507455186567002242960721753564377720e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98110969085197223326455343892026361745659892666792e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.87168416186433290338044307348274691328183025874875e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76617834164299773398132872208591772052901332037754e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66453228315972421461550445601122986044819216694033e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56667660566420358155020545295541534342280448473328e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.47253379742206423106677738280383561538678447099598e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38201944492028261241237139126370309134887020522227e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.29504338391497216822399002060042455255929172033631e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21151076941371541873268309875170001373933669674107e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13132306319599569566732542397115360583960085094557e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05437893878388134032875026105765513023510823662811e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.98057510488803958639055769576833497416198534518943e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90980704928171664583215022541991482756333886319968e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84196970581015939887507434989936103587334244252687e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.71466761190060277134943681477653029779567366907529e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.59783805418499297054160156055528324120765496304766e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49067248895976156052348805305261331386502966686347e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39239469072779970052355784357073824511972098277058e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30226775831578368563812661921353248199174220410465e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21959857362865736572087753828588520833223590723316e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.14374021016888157329134059840849638156945379859700e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07409273397524124098315500450332255837091994745793e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51262227118382410083502294668964941244339639326852e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.47210881437515219119366587863306479601015431240172e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.58694961605847093369352333991361229446932065546671e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.83038972648206055048198357994062155114178616723061e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.18048872538004489621141698918843893063210438954059e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.61930991141033943056096416560595391544177560102847e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.13220451632083656087262320205495936641073594149555e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.70720199535228802537946089633331273433882106834460e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00606266137064641523308174231815340440726269645335e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.45660546371321205864614064425880582491626457991880e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.01833274053934301777000134889054186531893301970748e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66311153577266781588333576976209120702027241843979e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37104415353596986305771278985496688059325853421426e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.12780203284913013522797993334714570038943007831995e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92288608540909381859978445524211209403484225475927e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74847564444513000450056174922427854590813567935902e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.46888494885704354461170804278826523218710845325600e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.25613324230091750631240813030902908673461241410450e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08995384276796746727925033729177141093311272472945e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.57298412261448730944183018356841954457807894385431e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.49445667426015051170301411155410826055184233951431e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60377136717752997401167765152949642869947243569953e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.85823720627621671017207210698248665551570440429385e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22684103170563193014558918295924551172698239430713e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.22006950358287273393523413555812293504310335665744e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.45787594574479193246071067008198432980227781543963e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.86454025983790677529906306805195677315975322722262e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39200041143645646258373331865031120883657317932172e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00844464598329521611240330284790908003180945122524e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69207105975977872414400493979073211647180301757125e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42749222811426799055113000065152671289607878504149e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20357145727036120652264700679701054983338793783565e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84678186992189202260619598427428218285343162114238e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.57685005349633837416913019835892196933717853056275e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.36681189477727165566612776659042216546642387956225e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19958501309529382285703116836639459985687034231775e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06387729083711703707125561075502300925516821745411e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51956726422504037060971384488455660747488001979504e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.58369730588727742464054792329377206933983130862391e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.79171138168694768852946218859426934990849762414603e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.52989549449674176739274234303186543768254475184699e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.57534022322054445175812334907544905864245720421642e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.83262692804541386527340439387556377914021496108680e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.24132027619974492546489695559384126869565934766815e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.76147662885863808608468523190827912794216909524203e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36574864341415676513997111497521230889735520043816e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03484906797876224319052497399923183994442800112441e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75482620393356950171417803995157642981745916059418e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.73979922776430555363554632966080889507162029820752e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44353982639936003430898773389958569528340214901924e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21747521599099094720054911524728128905331418743558e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.30442492111514986227593224497576120783344940470632e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52184402609265962838062304205455964936542503385355e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.38053115409151479601077504716814204054241984396579e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90230503273504174320645323659877052404533613091567e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.72566394266708055503723315148592640827491725732841e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37788129092113064009710124961268854372530077851514e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.40707992833487973793766069223107728401530584918012e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.97235161365145877787702193738699569533619914113956e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05088499104186197709404571098840228587836479911802e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.71543951706432436058369237052221627684485231071286e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31360623880232751062247585210413319083311528997997e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.64429939633040546807800247387131067206560952703714e-11), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64200779850290938904479244625064763530742033320990e-11), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.80537424541300683543633877614223483555123688801843e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.05250974812863673632096512092113144155816177586613e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.25671780676625854430204135462707277286762193086669e-13), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56563718516079592040149887308711394838177046916383e-13), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.07089725845782318037768094883949095101477347530653e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.20704648145099490050187930370138656919662848438144e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.13386215730722789754721037105719349775366357126605e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00880810181374362562734924119592255004499293746830e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41732769663403487193401296875220001924045479390391e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77165962079254358991751621103655291754760707304047e-16), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21457452599067948739689526379757206282312287607911e-17), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76821815748834935924611907974700181516735308796565e-18), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.46027269686043669905764884968375298647166107661462e-19), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.32534087107554587382206106210469124710349176992174e-20), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.40667608884443234227757632763086405915307399803018e-21), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.75834511105554042784697040953858007394668838202264e-22), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44793138881942553480871301192322509243346488933465e-23), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.05599142360242819185108912649040313655418331509614e-23), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.31998927950303523981386140811300392069272914785317e-24), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64998659937879404976732676014125490086591143489426e-25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06248324922349256220915845017656862608238929361934e-26), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.57810406152936570276144806272071078260298661702420e-27), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22263007691170712845181007840088847825373327128025e-28), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.02828759613963391056476259800111059781716658910032e-29), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.03535949517454238820595324750138824727145823637540e-30), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86774921121022248157180194922091913636165349433656e-32), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22933581425159726274559405456576861505650835849009e-33), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92083720976812072303999071025901346102579431014076e-35), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00130814026268862974998548477970853285280360959494e-37), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.68954396916045098398435231996829458258250563999209e-39), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.32741245181320466247555049995046028528516506248764e-41), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.14490819559581322851180476561725941957580704101369e-42), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78891905561845816954969494627696784308719850158390e-44), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.79518602440384088992139835355776225482374765872484e-46), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.36747816313100139050218492743400352316210571675756e-48), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.82418462989218967265966394911563050494079018243369e-50), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06627884842065463635307249204931726639699846600526e-51), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66606070065727286930167576882705822874531010313323e-53), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.60321984477698885828386838879227848241454703614566e-55), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.06753100746404509106854435748793512877272974397760e-57), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.35551719916257045479460055857489863870739022496500e-59), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.35551719916257045479460055857489863870739022496500e-59), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55163994120179942744008802699582486296567144164185e-62), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78818345019970563339865240965777554434978379307091e-66), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.24849475146412508153967873451605357507271433855203e-70), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25793328893167116248527312854395839235173689906056e-73), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.51253244368083779903631134898427341882748266372206e-77), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34583311613301704078034945043561362764342838469777e-80), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.28572538118412363471765002547757233311383882982854e-84), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.02179048140655184257238775751360432889120808063607e-88), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95844494174964644594052435486171980685820509781154e-91), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.78135972106847276840948328823662062221241478957896e-95), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16732415065148260947497150591714370659482782948705e-98), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84991247717647121453850465311802662742877888058362e-102), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.95779413373161917611939612577643219587104218892485e-106), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69868020843057108791977444476963676657007865940548e-109), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.14716847761369894511663682805087101213398110206416e-113), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.01249230410490696902261641309835718069677272999613e-116), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 600))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.71969536983273370677446521953393512020163902133418e-272), tolerance);
+
+    // The test is terminated because x is inf after this in fp64.
+    if (N <= 53) {
+        return;
+    }
+
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 10000))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41571262853590848961675783559267306723234880166889e-4516), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_mapairy_quantile_nearzero() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    mapairy_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.92947985498880164288717582971263069265106409267331e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.59822399410385083283727681965013516508738702183587e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.37051518906738394785544825118798831695789651589080e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.18765177572396470161180571018467021827812222940306e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.02990438632621036358966845969387883330987649629695e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.88799753980914934718534117636680696659950540760708e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.75672646260182080486757173789442594487758358719568e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.63281240925531315038207673147576301428806562043208e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.51400400121400801502802449510679455584862826208973e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.39864166928988824635332108295073034590821798923232e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.28542241490194660184907680173328998444992309715534e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.17326074020471664204142312429732771994689840460362e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.06119903663447640834652691309939508582577539512003e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.48344198262277235851026749871350785648904607023049e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.33817641975610807828267883212039917352738594297812e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.16710685455022053317001962780672309444401429230030e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.96039220307812911673187933977720502663701213461352e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.70691924008504753346480857177819427470334982130948e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.39365150798529936746993802101957358628812489096556e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.00474815142578902619056852805926625285378475306847e-1), tolerance);
+
+    // Relative error decreases near the root.
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.20300033010874226241969971846939668616524880409485e-2), tolerance * 4);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08557609474704600193050942980969400222443967430903e-1), tolerance * 4);
+
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84775355628419671941908878470333245751721020657513e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.81512108276093787175849069715334396559730936013748e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.05946829281923976565971908177998486051088750363345e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69237131791870252926200620656937624841227431099908e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29003620591891062966640208281844476036608475704263e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70276979914029738186601698003670175695927342267323e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.28080064428173146950050714257245479404308412291534e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22141517097499177560650456638736418866609207598601e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.36798741582712685566660459972731625067678977232527e0), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_mapairy_quantile_lower() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    mapairy_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.18765177572396470161180571018467021827812222940306e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.59822399410385083283727681965013516508738702183587e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.92947985498880164288717582971263069265106409267331e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.20998806047945704828514962310033638603842757237089e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.45496901537018787335070643452598202244805633093375e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.67354365380697578246790709817724830651682057476917e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.05331626220443158014611601879873776289585727270317e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.37844135506419820279451552495734835192576864639213e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.66475100891248273292011043832615771689316788780040e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.92187819510636694694450607724165688786106650410224e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.37198340489560108355199818136414022326129982959773e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.76028477470580699278679082649703786389435657070830e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.10419378761193307962534180728809528372838135595700e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.41443550638291131009585191506467027832121974024563e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.96026448032205844753104387192915744062974794241787e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.43355682497258196703130282301265363831865507562392e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.85432398223245647091315413807705550499215849013310e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.23500806363233607692361021471929015767129541901722e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.90695141954692987290075695228754376250679911499016e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.49149767947731938416802231743764792842977181401152e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.00124089585489463242206889945762289007601620222216e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.04845570631944023525776899386112795192930377369709e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.13196240444608571220864964743861585994042496495014e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.20475312075429969521072327133506584708675011147301e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.26971371983788322297191645356972895828456120573844e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.32865827226175697711590794217590458317537880046696e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.32865827226175697711590794217590458317537880046696e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.43303723297942256778502134022428247271630018451935e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52413050129859916220402440029315858653640179700446e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.60549473918897820617071548963372947780704796096977e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -512)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.67937186583822375017526293948703697021001956377209e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -640)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.81028650954862232126230250634527453300745376729470e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -768)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.92461760606525027716536281516018610241283237089375e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -896)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.02678879167343824140492421290271637384390204302948e1), tolerance);
+
+    // The test is terminated because p = 0 after this in fp64.
+    if(N <= 53){
+        return;
+    }
+
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.11959316095291435774375635827672516757012476284109e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1280)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.28411538998117558460421271505353342631141242515516e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1536)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.42785430825019465839030492595482468932343681278349e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -1792)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.55634220352411216642587067287258912926964865154135e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -2048)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.67307564006689676593687414536012112442665935238575e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -2560)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.88006705102088142572333576380332410442882971872985e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3072)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.06095108081567569614908765234696115918183394966145e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3584)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.22266885559263332213569359568928269416384800428618e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -4096)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.36960987939726803544369406181770745085696438921663e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -5120)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.63019918860979732881015320942243118103538614206846e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -6144)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.85794964449030059786959849735574335608745736630897e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -7168)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.06158611834421484993920289979452825015061756369038e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -8192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.24662793339079714510108682543625432044226557042768e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -10240)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.57480988422877882295975852850886697641784458384426e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -12288)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.86165509604814747106458415059041118242367113233764e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -14336)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.11814205550106893579346476384206383922751551334008e1), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_mapairy_quantile_upper() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    mapairy_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.16710685455022053317001962780672309444401429230030e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.96039220307812911673187933977720502663701213461352e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.70691924008504753346480857177819427470334982130948e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.39365150798529936746993802101957358628812489096556e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.00474815142578902619056852805926625285378475306847e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.20300033010874226241969971846939668616524880409485e-2), tolerance * 4);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08557609474704600193050942980969400222443967430903e-1), tolerance * 4);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.84775355628419671941908878470333245751721020657513e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.81512108276093787175849069715334396559730936013748e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.05946829281923976565971908177998486051088750363345e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.69237131791870252926200620656937624841227431099908e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, static_cast<RealType>(0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29003620591891062966640208281844476036608475704263e0), tolerance);
+
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70276979914029738186601698003670175695927342267323e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22141517097499177560650456638736418866609207598601e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.36798741582712685566660459972731625067678977232527e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.63240190107315016002742117235901647573374032928275e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37487073802383509843362175744776155324007021797019e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18429841828919685808654152263914024115105410242857e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.50552947051871870287466561935025391775332522082960e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38732922416208535732704088926085913683321415168727e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.49585416520688234273370911116680490544653394894587e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.80900106401310352569173769396359734191276945763559e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.59336704609415555521333325344685782416363311947039e3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55156669326461321628671444544286440554399508021258e4), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25510428200953290831680861672648808196801287129080e5), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43190196382396984755739962193081183260874381700500e6), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77306696194440571351895040208958865072901805506645e7), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32755474809804845725014907927003674268598932505496e9), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.38411271011676880227484089908793278086771148389202e10), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78343716417988572841183668816998971166780683532365e12), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14997210570212560225883973162628933329443239976683e15), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.99677152273063632531264438890163357898418349270434e18), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62497388866235918645459765949830559011448320623360e22), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.64139290653028845793860203742403295803332310962125e25), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.97920970865877789202766313638336627695568626671453e31), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.84407885843159775798215826490261804734522750775796e38), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87250989448760462117787200600339872607929758715096e44), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28743695332377477386366453958993754420344828061498e51), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28743695332377477386366453958993754420344828061498e51), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -320))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.98819430202303731872899900564216427734599065613620e63), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -384))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27507518735966284635753747136817320622994832637767e76), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -448))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38092093738495853701408205401696240265896114717023e89), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.05852403143769157513875467957386926795647775801563e102), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -640))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49075014586685014041688089060587827415714205982384e128), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -768))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.26604066065618724776472365999448082037897284653223e153), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -896))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54152887582709267575372250070675971504362522185818e179), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_mapairy_locscale_param() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    mapairy_distribution<RealType> dist_0_1(static_cast<RealType>(0), static_cast<RealType>(1));
+    mapairy_distribution<RealType> dist_1_3(static_cast<RealType>(1), static_cast<RealType>(3));
+    mapairy_distribution<RealType> dist_0_invcbrt18(static_cast<RealType>(0), 1 / cbrt(static_cast<RealType>(18)));
+
+    BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0072768184106563460003025875575283708), tolerance);
+    BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.0072768184106563460003025875575283708) + log(static_cast<RealType>(3)), tolerance);
+    BOOST_CHECK_CLOSE(entropy(dist_0_invcbrt18), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.0438195657786014485977283891231190450), tolerance);
+
+    BOOST_CHECK_CLOSE(median(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.71671068545502205331700196278067230944440), tolerance);
+    BOOST_CHECK_CLOSE(
+        median(dist_1_3),
+        (1 + 3 * BOOST_MATH_BIG_CONSTANT(RealType, N, -0.71671068545502205331700196278067230944440)),
+        tolerance
+    );
+    BOOST_CHECK_CLOSE(median(dist_0_invcbrt18), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.27347630981017495237228835747364595601553), tolerance);
+
+    BOOST_CHECK_CLOSE(mode(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.1615872711359706852500000803029112987), tolerance);
+    BOOST_CHECK_CLOSE(
+        mode(dist_1_3),
+        (1 + 3 * BOOST_MATH_BIG_CONSTANT(RealType, N, -1.1615872711359706852500000803029112987)),
+        tolerance
+    );
+    BOOST_CHECK_CLOSE(mode(dist_0_invcbrt18), BOOST_MATH_BIG_CONSTANT(RealType, N, -0.4432284977460014720866292801600737435), tolerance);
+
+    BOOST_CHECK_EQUAL(mean(dist_0_1), static_cast<RealType>(0));
+    BOOST_CHECK_EQUAL(mean(dist_1_3), static_cast<RealType>(1));
+    BOOST_CHECK_EQUAL(mean(dist_0_invcbrt18), static_cast<RealType>(0));
+
+    BOOST_CHECK((boost::math::isinf)(variance(dist_0_1)));
+
+    BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06251243013238748252181151646220197947016365256337e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97516171847191855609649452292217911972760948598791e-1) / 3, tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist_0_invcbrt18, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.29264884227495575949271548513687010027396013858107e-2), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.15803029418984060112492565010063896412665755747313e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.66666666666666666666666666666666666666666666666667e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_0_invcbrt18, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.19730152884211135907661924151362266458717453254074e-1), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_0_invcbrt18, quantile(dist_0_invcbrt18, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_0_invcbrt18, quantile(dist_0_invcbrt18, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+}
+
+BOOST_AUTO_TEST_CASE(mapairy_pdf_fp64)
+{
+    do_test_mapairy_pdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(mapairy_pdf_std64)
+{
+    do_test_mapairy_pdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(mapairy_pdf_fp128)
+{
+    do_test_mapairy_pdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(mapairy_cdf_fp64)
+{
+    do_test_mapairy_cdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(mapairy_cdf_std64)
+{
+    do_test_mapairy_cdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(mapairy_cdf_fp128)
+{
+    do_test_mapairy_cdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(mapairy_ccdf_fp64)
+{
+    do_test_mapairy_ccdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(mapairy_ccdf_std64)
+{
+    do_test_mapairy_ccdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(mapairy_ccdf_fp128)
+{
+    do_test_mapairy_ccdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(mapairy_quantile_nearzero_fp64)
+{
+    do_test_mapairy_quantile_nearzero<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(mapairy_quantile_nearzero_std64)
+{
+    do_test_mapairy_quantile_nearzero<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(mapairy_quantile_nearzero_fp128)
+{
+    do_test_mapairy_quantile_nearzero<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(mapairy_quantile_lower_fp64)
+{
+    do_test_mapairy_quantile_lower<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(mapairy_quantile_lower_std64)
+{
+    do_test_mapairy_quantile_lower<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(mapairy_quantile_lower_fp128)
+{
+    do_test_mapairy_quantile_lower<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(mapairy_quantile_upper_fp64)
+{
+    do_test_mapairy_quantile_upper<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(mapairy_quantile_upper_std64)
+{
+    do_test_mapairy_quantile_upper<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(mapairy_quantile_upper_fp128)
+{
+    do_test_mapairy_quantile_upper<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(mapairy_locscale_fp64)
+{
+    do_test_mapairy_locscale_param<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(mapairy_locscale_std64)
+{
+    do_test_mapairy_locscale_param<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(mapairy_locscale_fp128)
+{
+    do_test_mapairy_locscale_param<cpp_bin_float_quad, 113>();
+}
+#endif
diff --git a/test/test_mapairy_cdf_double.cu b/test/test_mapairy_cdf_double.cu
new file mode 100644
index 0000000000..7cb62a9343
--- /dev/null
+++ b/test/test_mapairy_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::mapairy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_mapairy_cdf_float.cu b/test/test_mapairy_cdf_float.cu
new file mode 100644
index 0000000000..b67c0ee933
--- /dev/null
+++ b/test/test_mapairy_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::mapairy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_mapairy_cdf_nvrtc_double.cpp b/test/test_mapairy_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..87e7948b72
--- /dev/null
+++ b/test/test_mapairy_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/mapairy.hpp>
+extern "C" __global__ 
+void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_mapairy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::mapairy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_mapairy_cdf_nvrtc_float.cpp b/test/test_mapairy_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..d84404c5e1
--- /dev/null
+++ b/test/test_mapairy_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/mapairy.hpp>
+extern "C" __global__ 
+void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_mapairy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::mapairy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_mapairy_pdf_double.cu b/test/test_mapairy_pdf_double.cu
new file mode 100644
index 0000000000..4ccd8b2f23
--- /dev/null
+++ b/test/test_mapairy_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::mapairy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_mapairy_pdf_float.cu b/test/test_mapairy_pdf_float.cu
new file mode 100644
index 0000000000..520ac9a68a
--- /dev/null
+++ b/test/test_mapairy_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::mapairy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_mapairy_pdf_nvrtc_double.cpp b/test/test_mapairy_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..5f461d6d13
--- /dev/null
+++ b/test/test_mapairy_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/mapairy.hpp>
+extern "C" __global__ 
+void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_mapairy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::mapairy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_mapairy_pdf_nvrtc_float.cpp b/test/test_mapairy_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..39eb4152d0
--- /dev/null
+++ b/test/test_mapairy_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/mapairy.hpp>
+extern "C" __global__ 
+void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_mapairy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::mapairy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_mapairy_quan_double.cu b/test/test_mapairy_quan_double.cu
new file mode 100644
index 0000000000..3787000207
--- /dev/null
+++ b/test/test_mapairy_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::mapairy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_mapairy_quan_float.cu b/test/test_mapairy_quan_float.cu
new file mode 100644
index 0000000000..cd9d120070
--- /dev/null
+++ b/test/test_mapairy_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::mapairy_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 15000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_mapairy_quan_nvrtc_double.cpp b/test/test_mapairy_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..43ac17c848
--- /dev/null
+++ b/test/test_mapairy_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/mapairy.hpp>
+extern "C" __global__ 
+void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_mapairy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::mapairy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_mapairy_quan_nvrtc_float.cpp b/test/test_mapairy_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..a127843e93
--- /dev/null
+++ b/test/test_mapairy_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/mapairy.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/mapairy.hpp>
+extern "C" __global__ 
+void test_mapairy_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::mapairy_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_mapairy_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_mapairy_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_mapairy_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::mapairy_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_modf_double.cu b/test/test_modf_double.cu
new file mode 100644
index 0000000000..06e65c1063
--- /dev/null
+++ b/test/test_modf_double.cu
@@ -0,0 +1,105 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/modf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    float_type fract;
+    int i_part;
+    long l_part;
+    long long ll_part;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type fract;
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::modf(h_A[i], &fract));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_modf_float.cu b/test/test_modf_float.cu
new file mode 100644
index 0000000000..06e65c1063
--- /dev/null
+++ b/test/test_modf_float.cu
@@ -0,0 +1,105 @@
+
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/modf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    float_type fract;
+    int i_part;
+    long l_part;
+    long long ll_part;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::modf(in[i], &fract) + boost::math::modf(in[i], &i_part) + boost::math::modf(in[i], &l_part) + boost::math::modf(in[i], &ll_part);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type fract;
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::modf(h_A[i], &fract));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_modf_nvrtc_double.cpp b/test/test_modf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f172dd52c1
--- /dev/null
+++ b/test/test_modf_nvrtc_double.cpp
@@ -0,0 +1,200 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/modf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/modf.hpp>
+extern "C" __global__ 
+void test_modf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    float_type fract;
+    int i_part;
+    long l_part;
+    long long ll_part;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::modf(in1[i], &fract) + 
+                 boost::math::modf(in1[i], &i_part) + 
+                 boost::math::modf(in1[i], &l_part) + 
+                 boost::math::modf(in1[i], &ll_part);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_modf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_modf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_modf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            float_type fract;
+            const auto res = 4 * boost::math::modf(h_in1[i], &fract);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_modf_nvrtc_float.cpp b/test/test_modf_nvrtc_float.cpp
new file mode 100644
index 0000000000..1dcd3c0810
--- /dev/null
+++ b/test/test_modf_nvrtc_float.cpp
@@ -0,0 +1,200 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/modf.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/modf.hpp>
+extern "C" __global__ 
+void test_modf_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    float_type fract;
+    int i_part;
+    long l_part;
+    long long ll_part;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::modf(in1[i], &fract) + 
+                 boost::math::modf(in1[i], &i_part) + 
+                 boost::math::modf(in1[i], &l_part) + 
+                 boost::math::modf(in1[i], &ll_part);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_modf_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_modf_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_modf_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            float_type fract;
+            const auto res = 4 * boost::math::modf(h_in1[i], &fract);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_beta.cpp b/test/test_nc_beta.cpp
index 3e7c08d0f3..105db62a3e 100644
--- a/test/test_nc_beta.cpp
+++ b/test/test_nc_beta.cpp
@@ -11,6 +11,9 @@
 // This must appear *before* any #includes, and precludes pch usage:
 //
 #define BOOST_MATH_ASSERT_UNDEFINED_POLICY false
+#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#endif
 
 #ifdef _MSC_VER
 #pragma warning (disable:4127 4512)
@@ -27,7 +30,12 @@
 #  define TEST_REAL_CONCEPT
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #include <boost/math/distributions/non_central_beta.hpp> // for chi_squared_distribution
 #include <boost/math/distributions/poisson.hpp> // for poisson_distribution
 #define BOOST_TEST_MAIN
@@ -41,6 +49,7 @@
 #include "test_ncbeta_hooks.hpp"
 #include "table_type.hpp"
 #include "test_nc_beta.hpp"
+#include "../include_private/boost/math/tools/test.hpp"
 
 #include <iostream>
 using std::cout;
diff --git a/test/test_nc_beta.hpp b/test/test_nc_beta.hpp
index 3ba983adca..39b4ede9eb 100644
--- a/test/test_nc_beta.hpp
+++ b/test/test_nc_beta.hpp
@@ -6,7 +6,9 @@
 #ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY
 #define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
 #endif
-#include <boost/math/concepts/real_concept.hpp>
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
+#include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
diff --git a/test/test_nc_beta_cdf_double.cu b/test/test_nc_beta_cdf_double.cu
new file mode 100644
index 0000000000..75073ac8d7
--- /dev/null
+++ b/test/test_nc_beta_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_beta_cdf_float.cu b/test/test_nc_beta_cdf_float.cu
new file mode 100644
index 0000000000..1088678c29
--- /dev/null
+++ b/test/test_nc_beta_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_beta_cdf_nvrtc_double.cpp b/test/test_nc_beta_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..4b9523fb22
--- /dev/null
+++ b/test/test_nc_beta_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_beta.hpp>
+extern "C" __global__ 
+void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_beta_cdf_nvrtc_float.cpp b/test/test_nc_beta_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..ec63159b41
--- /dev/null
+++ b/test/test_nc_beta_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_beta.hpp>
+extern "C" __global__ 
+void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_beta_pdf_double.cu b/test/test_nc_beta_pdf_double.cu
new file mode 100644
index 0000000000..485cf1d77c
--- /dev/null
+++ b/test/test_nc_beta_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_beta_pdf_float.cu b/test/test_nc_beta_pdf_float.cu
new file mode 100644
index 0000000000..bd989a330a
--- /dev/null
+++ b/test/test_nc_beta_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_beta_pdf_nvrtc_double.cpp b/test/test_nc_beta_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..64387cebc6
--- /dev/null
+++ b/test/test_nc_beta_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_beta.hpp>
+extern "C" __global__ 
+void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_beta_pdf_nvrtc_float.cpp b/test/test_nc_beta_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..de85a7ebc6
--- /dev/null
+++ b/test/test_nc_beta_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_beta.hpp>
+extern "C" __global__ 
+void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_beta_quan_double.cu b/test/test_nc_beta_quan_double.cu
new file mode 100644
index 0000000000..eb1a872774
--- /dev/null
+++ b/test/test_nc_beta_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_beta_quan_float.cu b/test/test_nc_beta_quan_float.cu
new file mode 100644
index 0000000000..f205e810c7
--- /dev/null
+++ b/test/test_nc_beta_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_beta distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_beta_quan_nvrtc_double.cpp b/test/test_nc_beta_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..61651d85e3
--- /dev/null
+++ b/test/test_nc_beta_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_beta.hpp>
+extern "C" __global__ 
+void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_beta_quan_nvrtc_float.cpp b/test/test_nc_beta_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..3c9a0ce231
--- /dev/null
+++ b/test/test_nc_beta_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_beta.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_beta.hpp>
+extern "C" __global__ 
+void test_non_central_beta_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_beta_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_beta_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_beta_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::non_central_beta_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_chi_squared.cpp b/test/test_nc_chi_squared.cpp
index 14bec61259..0d6f261fa3 100644
--- a/test/test_nc_chi_squared.cpp
+++ b/test/test_nc_chi_squared.cpp
@@ -7,7 +7,9 @@
 // (See accompanying file LICENSE_1_0.txt
 // or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp>
+#endif
 
 #ifdef _MSC_VER
 #pragma warning (disable:4127 4512)
@@ -24,8 +26,18 @@
 #  define TEST_REAL_CONCEPT
 #endif
 
-#include <boost/math/tools/test.hpp>
+#ifndef BOOST_MATH_OVERFLOW_ERROR_POLICY
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#endif
+
+#include <boost/math/tools/config.hpp>
+
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #include <boost/math/distributions/non_central_chi_squared.hpp> // for chi_squared_distribution
 #include <boost/math/special_functions/cbrt.hpp> // for chi_squared_distribution
 #define BOOST_TEST_MAIN
diff --git a/test/test_nc_chi_squared.hpp b/test/test_nc_chi_squared.hpp
index b2fa6d75be..3ab276a5a0 100644
--- a/test/test_nc_chi_squared.hpp
+++ b/test/test_nc_chi_squared.hpp
@@ -80,7 +80,7 @@ void test_spot(
    boost::math::non_central_chi_squared_distribution<RealType> dist(df, ncp);
    BOOST_CHECK_CLOSE(
       cdf(dist, cs), P, tol);
-#ifndef BOOST_NO_EXCEPTIONS
+#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS)
    try{
       BOOST_CHECK_CLOSE(
          pdf(dist, cs), naive_pdf(dist.degrees_of_freedom(), ncp, cs), tol * 150);
@@ -402,7 +402,7 @@ void quantile_sanity_check(T& data, const char* type_name, const char* test)
          // Sanity check mode, the accuracy of
          // the mode is at *best* the square root of the accuracy of the PDF:
          //
-#ifndef BOOST_NO_EXCEPTIONS
+#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS)
          try{
             value_type m = mode(boost::math::non_central_chi_squared_distribution<value_type>(data[i][0], data[i][1]));
             value_type p = pdf(boost::math::non_central_chi_squared_distribution<value_type>(data[i][0], data[i][1]), m);
@@ -417,7 +417,7 @@ void quantile_sanity_check(T& data, const char* type_name, const char* test)
          // values to get back to the correct degrees of freedom or
          // non-centrality parameter:
          //
-#ifndef BOOST_NO_EXCEPTIONS
+#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS)
          try{
 #endif
             if((data[i][3] < 0.99) && (data[i][3] != 0))
@@ -438,7 +438,7 @@ void quantile_sanity_check(T& data, const char* type_name, const char* test)
                   boost::math::non_central_chi_squared_distribution<value_type>::find_non_centrality(boost::math::complement(data[i][0], data[i][2], data[i][4])),
                   data[i][1], precision, i);
             }
-#ifndef BOOST_NO_EXCEPTIONS
+#if !defined(BOOST_NO_EXCEPTIONS) && !defined(BOOST_MATH_NO_EXCEPTIONS)
          }
          catch(const std::exception& e)
          {
diff --git a/test/test_nc_chi_squared_cdf_double.cu b/test/test_nc_chi_squared_cdf_double.cu
new file mode 100644
index 0000000000..64a442f6f8
--- /dev/null
+++ b/test/test_nc_chi_squared_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_chi_squared_cdf_float.cu b/test/test_nc_chi_squared_cdf_float.cu
new file mode 100644
index 0000000000..8ac518adc8
--- /dev/null
+++ b/test/test_nc_chi_squared_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_chi_squared_cdf_nvrtc_double.cpp b/test/test_nc_chi_squared_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..05569b02eb
--- /dev/null
+++ b/test/test_nc_chi_squared_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+extern "C" __global__ 
+void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_chi_squared_cdf_nvrtc_float.cpp b/test/test_nc_chi_squared_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..57964384bd
--- /dev/null
+++ b/test/test_nc_chi_squared_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+extern "C" __global__ 
+void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_chi_squared_pdf_double.cu b/test/test_nc_chi_squared_pdf_double.cu
new file mode 100644
index 0000000000..19a96944bd
--- /dev/null
+++ b/test/test_nc_chi_squared_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_chi_squared_pdf_float.cu b/test/test_nc_chi_squared_pdf_float.cu
new file mode 100644
index 0000000000..9433005b26
--- /dev/null
+++ b/test/test_nc_chi_squared_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_chi_squared_pdf_nvrtc_double.cpp b/test/test_nc_chi_squared_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..4ddede8281
--- /dev/null
+++ b/test/test_nc_chi_squared_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+extern "C" __global__ 
+void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_chi_squared_pdf_nvrtc_float.cpp b/test/test_nc_chi_squared_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..ec749b2a4b
--- /dev/null
+++ b/test/test_nc_chi_squared_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+extern "C" __global__ 
+void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_chi_squared_quan_double.cu b/test/test_nc_chi_squared_quan_double.cu
new file mode 100644
index 0000000000..a54a2e015c
--- /dev/null
+++ b/test/test_nc_chi_squared_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_chi_squared_quan_float.cu b/test/test_nc_chi_squared_quan_float.cu
new file mode 100644
index 0000000000..7f83eb5c0b
--- /dev/null
+++ b/test/test_nc_chi_squared_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_chi_squared distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_chi_squared_quan_nvrtc_double.cpp b/test/test_nc_chi_squared_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..f2ca532622
--- /dev/null
+++ b/test/test_nc_chi_squared_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+extern "C" __global__ 
+void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_chi_squared_quan_nvrtc_float.cpp b/test/test_nc_chi_squared_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..931a1aa60e
--- /dev/null
+++ b/test/test_nc_chi_squared_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_chi_squared.hpp>
+extern "C" __global__ 
+void test_non_central_chi_squared_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_chi_squared_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_chi_squared_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_chi_squared_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::non_central_chi_squared_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_f.cpp b/test/test_nc_f.cpp
index 1cf411516d..7de8b254d5 100644
--- a/test/test_nc_f.cpp
+++ b/test/test_nc_f.cpp
@@ -7,7 +7,9 @@
 // (See accompanying file LICENSE_1_0.txt
 // or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp>
+#endif
 
 #ifdef _MSC_VER
 #pragma warning (disable:4127 4512)
@@ -20,8 +22,12 @@
 #  define TEST_REAL_CONCEPT
 #endif
 
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #include <boost/math/distributions/non_central_f.hpp> // for chi_squared_distribution
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // for test_main
diff --git a/test/test_nc_f_cdf_double.cu b/test/test_nc_f_cdf_double.cu
new file mode 100644
index 0000000000..a4e4c442ff
--- /dev/null
+++ b/test/test_nc_f_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_f_cdf_float.cu b/test/test_nc_f_cdf_float.cu
new file mode 100644
index 0000000000..79a7d84144
--- /dev/null
+++ b/test/test_nc_f_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_f_cdf_nvrtc_double.cpp b/test/test_nc_f_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1f35ba2ec4
--- /dev/null
+++ b/test/test_nc_f_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_f.hpp>
+extern "C" __global__ 
+void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_f_cdf_nvrtc_float.cpp b/test/test_nc_f_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..4aaa8b3910
--- /dev/null
+++ b/test/test_nc_f_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_f.hpp>
+extern "C" __global__ 
+void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_f_pdf_double.cu b/test/test_nc_f_pdf_double.cu
new file mode 100644
index 0000000000..3a156ffb71
--- /dev/null
+++ b/test/test_nc_f_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_f_pdf_float.cu b/test/test_nc_f_pdf_float.cu
new file mode 100644
index 0000000000..8a792b2f7e
--- /dev/null
+++ b/test/test_nc_f_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_f_pdf_nvrtc_double.cpp b/test/test_nc_f_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..bb5da728e9
--- /dev/null
+++ b/test/test_nc_f_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_f.hpp>
+extern "C" __global__ 
+void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_f_pdf_nvrtc_float.cpp b/test/test_nc_f_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..db46004729
--- /dev/null
+++ b/test/test_nc_f_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_f.hpp>
+extern "C" __global__ 
+void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_f_quan_double.cu b/test/test_nc_f_quan_double.cu
new file mode 100644
index 0000000000..687f789616
--- /dev/null
+++ b/test/test_nc_f_quan_double.cu
@@ -0,0 +1,117 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    bool failed = false;
+    for(int i = 0; i < numElements; ++i)
+    {
+        // Nearly all values are within 150 eps but their out outliers that hit ~100'000 eps
+        // Typically this occurs around 0 on device with doubles.
+        // Floats do not have this issue
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100000.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            failed = true;
+        }
+    }
+
+    if (failed)
+      return EXIT_FAILURE;
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_f_quan_float.cu b/test/test_nc_f_quan_float.cu
new file mode 100644
index 0000000000..e2cd182aed
--- /dev/null
+++ b/test/test_nc_f_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch non_central_f distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 300.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_nc_f_quan_nvrtc_double.cpp b/test/test_nc_f_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..c2a6acd553
--- /dev/null
+++ b/test/test_nc_f_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_f.hpp>
+extern "C" __global__ 
+void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nc_f_quan_nvrtc_float.cpp b/test/test_nc_f_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..2e2aefc5e5
--- /dev/null
+++ b/test/test_nc_f_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/non_central_f.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/non_central_f.hpp>
+extern "C" __global__ 
+void test_non_central_f_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_non_central_f_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_non_central_f_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_non_central_f_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::non_central_f_distribution<float_type>(0.5, 0.5, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_negative_binomial.cpp b/test/test_negative_binomial.cpp
index 069ebf8798..69f42b4a4b 100644
--- a/test/test_negative_binomial.cpp
+++ b/test/test_negative_binomial.cpp
@@ -26,9 +26,13 @@
 #  define TEST_REAL_CONCEPT
 #endif
 
-#include <boost/math/tools/test.hpp> // for real_concept
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
 using ::boost::math::concepts::real_concept;
+#endif
 
 #include <boost/math/distributions/negative_binomial.hpp> // for negative_binomial_distribution
 using boost::math::negative_binomial_distribution;
diff --git a/test/test_negative_binomial_cdf_double.cu b/test/test_negative_binomial_cdf_double.cu
new file mode 100644
index 0000000000..6c4ae4e07d
--- /dev/null
+++ b/test/test_negative_binomial_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_negative_binomial_cdf_float.cu b/test/test_negative_binomial_cdf_float.cu
new file mode 100644
index 0000000000..0f5849474b
--- /dev/null
+++ b/test/test_negative_binomial_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_negative_binomial_cdf_nvrtc_double.cpp b/test/test_negative_binomial_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1cb7af6663
--- /dev/null
+++ b/test/test_negative_binomial_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/negative_binomial.hpp>
+extern "C" __global__ 
+void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_negative_binomial_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_negative_binomial_cdf_nvrtc_float.cpp b/test/test_negative_binomial_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..f0d1b16476
--- /dev/null
+++ b/test/test_negative_binomial_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/negative_binomial.hpp>
+extern "C" __global__ 
+void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_negative_binomial_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_negative_binomial_pdf_double.cu b/test/test_negative_binomial_pdf_double.cu
new file mode 100644
index 0000000000..16bd2ee487
--- /dev/null
+++ b/test/test_negative_binomial_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_negative_binomial_pdf_float.cu b/test/test_negative_binomial_pdf_float.cu
new file mode 100644
index 0000000000..d9965c3050
--- /dev/null
+++ b/test/test_negative_binomial_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_negative_binomial_pdf_nvrtc_double.cpp b/test/test_negative_binomial_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..395f9a30e5
--- /dev/null
+++ b/test/test_negative_binomial_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/negative_binomial.hpp>
+extern "C" __global__ 
+void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_negative_binomial_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_negative_binomial_pdf_nvrtc_float.cpp b/test/test_negative_binomial_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..ad20351fcd
--- /dev/null
+++ b/test/test_negative_binomial_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/negative_binomial.hpp>
+extern "C" __global__ 
+void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_negative_binomial_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::negative_binomial_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_negative_binomial_quan_double.cu b/test/test_negative_binomial_quan_double.cu
new file mode 100644
index 0000000000..2ecabc86c7
--- /dev/null
+++ b/test/test_negative_binomial_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_negative_binomial_quan_float.cu b/test/test_negative_binomial_quan_float.cu
new file mode 100644
index 0000000000..e9a3aece3a
--- /dev/null
+++ b/test/test_negative_binomial_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch negative_binomial distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_negative_binomial_quan_nvrtc_double.cpp b/test/test_negative_binomial_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..c63342ba66
--- /dev/null
+++ b/test/test_negative_binomial_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/negative_binomial.hpp>
+extern "C" __global__ 
+void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_negative_binomial_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_negative_binomial_quan_nvrtc_float.cpp b/test/test_negative_binomial_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..6d7e98839f
--- /dev/null
+++ b/test/test_negative_binomial_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/negative_binomial.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/negative_binomial.hpp>
+extern "C" __global__ 
+void test_negative_binomial_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_negative_binomial_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_negative_binomial_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_negative_binomial_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::negative_binomial_distribution<float_type>(1, 0.5), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_nonfinite_io.cpp b/test/test_nonfinite_io.cpp
index b917549810..855843c715 100644
--- a/test/test_nonfinite_io.cpp
+++ b/test/test_nonfinite_io.cpp
@@ -14,8 +14,8 @@
 #define BOOST_TEST_MAIN
 
 #include <boost/test/unit_test.hpp>
-#include <libs/math/test/almost_equal.ipp> // Similar to BOOST_CLOSE_FRACTION.
-#include <libs/math/test/s_.ipp> // To create test strings like std::basic_string<CharType> s = S_("0 -0"); 
+#include <almost_equal.ipp> // Similar to BOOST_CLOSE_FRACTION.
+#include <s_.ipp> // To create test strings like std::basic_string<CharType> s = S_("0 -0"); 
 #include <boost/math/special_functions/nonfinite_num_facets.hpp>
 
 #include <locale>
diff --git a/test/test_normal.cpp b/test/test_normal.cpp
index ef984d5e63..e68a1f82e3 100644
--- a/test/test_normal.cpp
+++ b/test/test_normal.cpp
@@ -15,7 +15,9 @@
 // From MathWorld--A Wolfram Web Resource.
 // http://mathworld.wolfram.com/NormalDistribution.html
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp> // include directory /libs/math/src/tr1/ is needed.
+#endif
 
 #ifdef _MSC_VER
 #  pragma warning (disable: 4127) // conditional expression is constant
@@ -23,15 +25,20 @@
 // and   if (std::numeric_limits<RealType>::has_quiet_NaN)
 #endif
 
-#include <boost/math/tools/test.hpp>
+#include <boost/math/tools/config.hpp>
+
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/math/distributions/normal.hpp>
     using boost::math::normal_distribution;
-#include <boost/math/tools/test.hpp>
 #include "test_out_of_range.hpp"
 
 #include <iostream>
diff --git a/test/test_normal_cdf_double.cu b/test/test_normal_cdf_double.cu
new file mode 100644
index 0000000000..cd99d49253
--- /dev/null
+++ b/test/test_normal_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::normal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_normal_cdf_float.cu b/test/test_normal_cdf_float.cu
new file mode 100644
index 0000000000..c8e422f6e3
--- /dev/null
+++ b/test/test_normal_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::normal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_normal_cdf_nvrtc_double.cpp b/test/test_normal_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..934e18d434
--- /dev/null
+++ b/test/test_normal_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/normal.hpp>
+extern "C" __global__ 
+void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_normal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::normal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_normal_cdf_nvrtc_float.cpp b/test/test_normal_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..06a2351d2e
--- /dev/null
+++ b/test/test_normal_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/normal.hpp>
+extern "C" __global__ 
+void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_normal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::normal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_normal_pdf_double.cu b/test/test_normal_pdf_double.cu
new file mode 100644
index 0000000000..b318023318
--- /dev/null
+++ b/test/test_normal_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::normal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_normal_pdf_float.cu b/test/test_normal_pdf_float.cu
new file mode 100644
index 0000000000..155278fede
--- /dev/null
+++ b/test/test_normal_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::normal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_normal_pdf_nvrtc_double.cpp b/test/test_normal_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..216e264dd1
--- /dev/null
+++ b/test/test_normal_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/normal.hpp>
+extern "C" __global__ 
+void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_normal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::normal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_normal_pdf_nvrtc_float.cpp b/test/test_normal_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..669a1aad26
--- /dev/null
+++ b/test/test_normal_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/normal.hpp>
+extern "C" __global__ 
+void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_normal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::normal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_normal_quan_double.cu b/test/test_normal_quan_double.cu
new file mode 100644
index 0000000000..ca7fea427f
--- /dev/null
+++ b/test/test_normal_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::normal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_normal_quan_float.cu b/test/test_normal_quan_float.cu
new file mode 100644
index 0000000000..ca7fea427f
--- /dev/null
+++ b/test/test_normal_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch normal distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::normal_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_normal_quan_nvrtc_double.cpp b/test/test_normal_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..f47650f708
--- /dev/null
+++ b/test/test_normal_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/normal.hpp>
+extern "C" __global__ 
+void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_normal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::normal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_normal_quan_nvrtc_float.cpp b/test/test_normal_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..d988472778
--- /dev/null
+++ b/test/test_normal_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/normal.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/normal.hpp>
+extern "C" __global__ 
+void test_normal_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::normal_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_normal_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_normal_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_normal_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::normal_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_out_of_range.hpp b/test/test_out_of_range.hpp
index a8c93576f4..8b7e723c07 100644
--- a/test/test_out_of_range.hpp
+++ b/test/test_out_of_range.hpp
@@ -8,8 +8,9 @@
 #ifndef BOOST_MATH_TEST_OUT_OF_RANGE_HPP
 #define BOOST_MATH_TEST_OUT_OF_RANGE_HPP
 
+#include <boost/math/tools/config.hpp>
 #include <boost/math/special_functions/next.hpp>
-#include <boost/test/test_tools.hpp>
+#include <boost/test/unit_test.hpp>
 
 /*` check_out_of_range functions check that bad parameters
 passed to constructors and functions throw domain_error exceptions.
@@ -30,6 +31,11 @@ but does *not* check finite but out-of-range parameters to the constructor
 because these are specific to each distribution.
 */
 
+#if defined(BOOST_CHECK_THROW) && defined(BOOST_MATH_NO_EXCEPTIONS)
+#  undef BOOST_CHECK_THROW
+#  define BOOST_CHECK_THROW(x, y)
+#endif
+
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable:4127)
@@ -48,60 +54,60 @@ void check_support(const Distro& d, bool Infinite = false)
        value_type m = (range(d).first == 0) ? -boost::math::tools::min_value<value_type>() : boost::math::float_prior(range(d).first);
        BOOST_MATH_ASSERT(m != range(d).first);
        BOOST_MATH_ASSERT(m < range(d).first);
-       BOOST_MATH_CHECK_THROW(pdf(d, m), std::domain_error);
-       BOOST_MATH_CHECK_THROW(cdf(d, m), std::domain_error);
-       BOOST_MATH_CHECK_THROW(cdf(complement(d, m)), std::domain_error);
+       BOOST_CHECK_THROW(pdf(d, m), std::domain_error);
+       BOOST_CHECK_THROW(cdf(d, m), std::domain_error);
+       BOOST_CHECK_THROW(cdf(complement(d, m)), std::domain_error);
      }
      if ((boost::math::isfinite)(range(d).second) && (range(d).second != boost::math::tools::max_value<value_type>()))
      { // If possible, check that a random variable value just more than the top of the supported range throws domain errors.
        value_type m = (range(d).second == 0) ? boost::math::tools::min_value<value_type>() : boost::math::float_next(range(d).second);
        BOOST_MATH_ASSERT(m != range(d).first);
        BOOST_MATH_ASSERT(m > range(d).first);
-       BOOST_MATH_CHECK_THROW(pdf(d, m), std::domain_error);
-       BOOST_MATH_CHECK_THROW(cdf(d, m), std::domain_error);
-       BOOST_MATH_CHECK_THROW(cdf(complement(d, m)), std::domain_error);
+       BOOST_CHECK_THROW(pdf(d, m), std::domain_error);
+       BOOST_CHECK_THROW(cdf(d, m), std::domain_error);
+       BOOST_CHECK_THROW(cdf(complement(d, m)), std::domain_error);
      }
      if (std::numeric_limits<value_type>::has_infinity)
      { // Infinity is available,
        if ((boost::math::isfinite)(range(d).second))
        {  // and top of range doesn't include infinity,
           // check that using infinity throws domain errors.
-         BOOST_MATH_CHECK_THROW(pdf(d, std::numeric_limits<value_type>::infinity()), std::domain_error);
-         BOOST_MATH_CHECK_THROW(cdf(d, std::numeric_limits<value_type>::infinity()), std::domain_error);
-         BOOST_MATH_CHECK_THROW(cdf(complement(d, std::numeric_limits<value_type>::infinity())), std::domain_error);
+         BOOST_CHECK_THROW(pdf(d, std::numeric_limits<value_type>::infinity()), std::domain_error);
+         BOOST_CHECK_THROW(cdf(d, std::numeric_limits<value_type>::infinity()), std::domain_error);
+         BOOST_CHECK_THROW(cdf(complement(d, std::numeric_limits<value_type>::infinity())), std::domain_error);
        }
        if ((boost::math::isfinite)(range(d).first))
        {  // and bottom of range doesn't include infinity,
           // check that using infinity throws domain_error exception.
-         BOOST_MATH_CHECK_THROW(pdf(d, -std::numeric_limits<value_type>::infinity()), std::domain_error);
-         BOOST_MATH_CHECK_THROW(cdf(d, -std::numeric_limits<value_type>::infinity()), std::domain_error);
-         BOOST_MATH_CHECK_THROW(cdf(complement(d, -std::numeric_limits<value_type>::infinity())), std::domain_error);
+         BOOST_CHECK_THROW(pdf(d, -std::numeric_limits<value_type>::infinity()), std::domain_error);
+         BOOST_CHECK_THROW(cdf(d, -std::numeric_limits<value_type>::infinity()), std::domain_error);
+         BOOST_CHECK_THROW(cdf(complement(d, -std::numeric_limits<value_type>::infinity())), std::domain_error);
        }
        // Check that using infinity with quantiles always throws domain_error exception.
-       BOOST_MATH_CHECK_THROW(quantile(d, std::numeric_limits<value_type>::infinity()), std::domain_error);
-       BOOST_MATH_CHECK_THROW(quantile(d, -std::numeric_limits<value_type>::infinity()), std::domain_error);
-       BOOST_MATH_CHECK_THROW(quantile(complement(d, std::numeric_limits<value_type>::infinity())), std::domain_error);
-       BOOST_MATH_CHECK_THROW(quantile(complement(d, -std::numeric_limits<value_type>::infinity())), std::domain_error);
+       BOOST_CHECK_THROW(quantile(d, std::numeric_limits<value_type>::infinity()), std::domain_error);
+       BOOST_CHECK_THROW(quantile(d, -std::numeric_limits<value_type>::infinity()), std::domain_error);
+       BOOST_CHECK_THROW(quantile(complement(d, std::numeric_limits<value_type>::infinity())), std::domain_error);
+       BOOST_CHECK_THROW(quantile(complement(d, -std::numeric_limits<value_type>::infinity())), std::domain_error);
      }
    }
    if(std::numeric_limits<value_type>::has_quiet_NaN)
    { // NaN is available.
-      BOOST_MATH_CHECK_THROW(pdf(d, std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
-      BOOST_MATH_CHECK_THROW(cdf(d, std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
-      BOOST_MATH_CHECK_THROW(cdf(complement(d, std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
-      BOOST_MATH_CHECK_THROW(pdf(d, -std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
-      BOOST_MATH_CHECK_THROW(cdf(d, -std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
-      BOOST_MATH_CHECK_THROW(cdf(complement(d, -std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
-      BOOST_MATH_CHECK_THROW(quantile(d, std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
-      BOOST_MATH_CHECK_THROW(quantile(d, -std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
-      BOOST_MATH_CHECK_THROW(quantile(complement(d, std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
-      BOOST_MATH_CHECK_THROW(quantile(complement(d, -std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
+      BOOST_CHECK_THROW(pdf(d, std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
+      BOOST_CHECK_THROW(cdf(d, std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
+      BOOST_CHECK_THROW(cdf(complement(d, std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
+      BOOST_CHECK_THROW(pdf(d, -std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
+      BOOST_CHECK_THROW(cdf(d, -std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
+      BOOST_CHECK_THROW(cdf(complement(d, -std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
+      BOOST_CHECK_THROW(quantile(d, std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
+      BOOST_CHECK_THROW(quantile(d, -std::numeric_limits<value_type>::quiet_NaN()), std::domain_error);
+      BOOST_CHECK_THROW(quantile(complement(d, std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
+      BOOST_CHECK_THROW(quantile(complement(d, -std::numeric_limits<value_type>::quiet_NaN())), std::domain_error);
    }
    // Check that using probability outside [0,1] with quantiles always throws domain_error exception.
-   BOOST_MATH_CHECK_THROW(quantile(d, -1), std::domain_error);
-   BOOST_MATH_CHECK_THROW(quantile(d, 2), std::domain_error);
-   BOOST_MATH_CHECK_THROW(quantile(complement(d, -1)), std::domain_error);
-   BOOST_MATH_CHECK_THROW(quantile(complement(d, 2)), std::domain_error);
+   BOOST_CHECK_THROW(quantile(d, -1), std::domain_error);
+   BOOST_CHECK_THROW(quantile(d, 2), std::domain_error);
+   BOOST_CHECK_THROW(quantile(complement(d, -1)), std::domain_error);
+   BOOST_CHECK_THROW(quantile(complement(d, 2)), std::domain_error);
 }
 
 // Four check_out_of_range versions for distributions with zero to 3 constructor parameters.
@@ -121,12 +127,12 @@ void check_out_of_range(typename Distro::value_type p1)
    check_support(d);
    if(std::numeric_limits<value_type>::has_infinity)
    {
-      BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity()), range(d).first), std::domain_error);
- //     BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity()), range(d).second), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity()), range(d).first), std::domain_error);
+ //     BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity()), range(d).second), std::domain_error);
    }
    if(std::numeric_limits<value_type>::has_quiet_NaN)
    {
-      BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::quiet_NaN()), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::quiet_NaN()), range(d).first), std::domain_error);
    }
 }
 
@@ -138,13 +144,13 @@ void check_out_of_range(typename Distro::value_type p1, typename Distro::value_t
    check_support(d);
    if(std::numeric_limits<value_type>::has_infinity)
    {
-      BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity(), p2), range(d).first), std::domain_error);
-      BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::infinity()), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity(), p2), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::infinity()), range(d).first), std::domain_error);
    }
    if(std::numeric_limits<value_type>::has_quiet_NaN)
    {
-      BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::quiet_NaN(), p2), range(d).first), std::domain_error);
-      BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::quiet_NaN()), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::quiet_NaN(), p2), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::quiet_NaN()), range(d).first), std::domain_error);
    }
 }
 
@@ -156,15 +162,15 @@ void check_out_of_range(typename Distro::value_type p1, typename Distro::value_t
    check_support(d);
    if(std::numeric_limits<value_type>::has_infinity)
    {
-      BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity(), p2, p3), range(d).first), std::domain_error);
-      BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::infinity(), p3), range(d).first), std::domain_error);
-      BOOST_MATH_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits<value_type>::infinity()), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::infinity(), p2, p3), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::infinity(), p3), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits<value_type>::infinity()), range(d).first), std::domain_error);
    }
    if(std::numeric_limits<value_type>::has_quiet_NaN)
    {
-      BOOST_MATH_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::quiet_NaN(), p2, p3), range(d).first), std::domain_error);
-      BOOST_MATH_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::quiet_NaN(), p3), range(d).first), std::domain_error);
-      BOOST_MATH_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits<value_type>::quiet_NaN()), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(std::numeric_limits<value_type>::quiet_NaN(), p2, p3), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(p1, std::numeric_limits<value_type>::quiet_NaN(), p3), range(d).first), std::domain_error);
+      BOOST_CHECK_THROW(pdf(Distro(p1, p2, std::numeric_limits<value_type>::quiet_NaN()), range(d).first), std::domain_error);
    }
 }
 
diff --git a/test/test_owens_t.cpp b/test/test_owens_t.cpp
index 8c33e77f07..11389dd205 100644
--- a/test/test_owens_t.cpp
+++ b/test/test_owens_t.cpp
@@ -38,9 +38,9 @@ using boost::math::owens_t;
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/array.hpp>
 
-#include "libs/math/test/handle_test_result.hpp"
-#include "libs/math/test/table_type.hpp"
-#include "libs/math/test/functor.hpp"
+#include "handle_test_result.hpp"
+#include "table_type.hpp"
+#include "functor.hpp"
 #include "boost/math/tools/test_value.hpp"
 #include "test_owens_t.hpp"
 
diff --git a/test/test_owens_t.hpp b/test/test_owens_t.hpp
index 995446e5fd..a65ba4af94 100644
--- a/test/test_owens_t.hpp
+++ b/test/test_owens_t.hpp
@@ -41,30 +41,28 @@ void test_spots(RealType)
    using ::boost::math::normal_distribution;
    BOOST_MATH_STD_USING // ADL of std names.
 
-   if(std::numeric_limits<RealType>::digits && (std::numeric_limits<RealType>::digits < 100))
-   {
-
-      // Checks of six sub-methods T1 to T6.
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.0625L), static_cast<RealType>(0.25L)), static_cast<RealType>(3.89119302347013668966224771378e-2L), tolerance);  // T1
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(6.5L), static_cast<RealType>(0.4375L)), static_cast<RealType>(2.00057730485083154100907167685E-11L), tolerance); // T2
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(7L), static_cast<RealType>(0.96875L)), static_cast<RealType>(6.39906271938986853083219914429E-13L), tolerance); // T3
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(4.78125L), static_cast<RealType>(0.0625L)), static_cast<RealType>(1.06329748046874638058307112826E-7L), tolerance); // T4
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(2.L), static_cast<RealType>(0.5L)), static_cast<RealType>(8.62507798552150713113488319155E-3L), tolerance); // T5
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(1.L), static_cast<RealType>(0.9999975L)), static_cast<RealType>(6.67418089782285927715589822405E-2L), tolerance); // T6
-      //BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(L), static_cast<RealType>(L)), static_cast<RealType>(L), tolerance);
-
-      //   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(L), static_cast<RealType>(L)), static_cast<RealType>(L), tolerance);
-
-      // Spots values using Mathematica
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(6.5L), static_cast<RealType>(0.4375L)), static_cast<RealType>(2.00057730485083154100907167684918851101649922551817956120806662022118024594547E-11L), tolerance);
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.4375L), static_cast<RealType>(6.5L)), static_cast<RealType>(0.16540130125449396247498691826626273249659241838438244251206819782787761751256L), tolerance);
+   // Checks of six sub-methods T1 to T6.
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.0625L), static_cast<RealType>(0.25L)), static_cast<RealType>(3.89119302347013668966224771378499505568e-2L), tolerance);  // T1
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(6.5L), static_cast<RealType>(0.4375L)), static_cast<RealType>(2.00057730485083154100907167684918851101649922551817956120806662022118025e-11L), tolerance); // T2
+   if (boost::math::tools::digits<RealType>() < 100) // too large error for 128 bit long double
+      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(7L), static_cast<RealType>(0.96875L)), static_cast<RealType>(6.3990627193898685308321991442891601376479719094145923322318222572484602e-13L), tolerance); // T3
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(4.78125L), static_cast<RealType>(0.0625L)), static_cast<RealType>(1.06329748046874638058307112826015825291136503488102191050906959246644943e-7L), tolerance); // T4
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(2.L), static_cast<RealType>(0.5L)), static_cast<RealType>(8.6250779855215071311348831915463718787564119039085429110080944948781288e-3L), tolerance); // T5
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(1.L), static_cast<RealType>(0.9999975L)), static_cast<RealType>(6.6741808978228592771558982240461689232406934240709035854119334966793020e-2L), tolerance); // T6
+   //BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(L), static_cast<RealType>(L)), static_cast<RealType>(L), tolerance);
+
+   //   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(L), static_cast<RealType>(L)), static_cast<RealType>(L), tolerance);
+
+   // Spots values using Mathematica
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(6.5L), static_cast<RealType>(0.4375L)), static_cast<RealType>(2.00057730485083154100907167684918851101649922551817956120806662022118024594547E-11L), tolerance);
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.4375L), static_cast<RealType>(6.5L)), static_cast<RealType>(0.16540130125449396247498691826626273249659241838438244251206819782787761751256L), tolerance);
+   if (boost::math::tools::digits<RealType>() < 100) // too large error for 128 bit long double
       BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(7.L), static_cast<RealType>(0.96875L)), static_cast<RealType>(6.39906271938986853083219914428916013764797190941459233223182225724846022843930e-13L), tolerance);
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.96875L), static_cast<RealType>(7.L)), static_cast<RealType>(0.08316748474602973770533230453272140919966614259525787470390475393923633179072L), tolerance);
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(4.78125L), static_cast<RealType>(0.0625L)), static_cast<RealType>(1.06329748046874638058307112826015825291136503488102191050906959246644942646701e-7L), tolerance);
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.0625L), static_cast<RealType>(4.78125L)), static_cast<RealType>(0.21571185819897989857261253680409017017649352928888660746045361855686569265171L), tolerance);
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(2.L), static_cast<RealType>(0.5L)), static_cast<RealType>(0.00862507798552150713113488319154637187875641190390854291100809449487812876461L), tolerance);
-      BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.5L), static_cast<RealType>(2L)), static_cast<RealType>(0.14158060365397839346662819588111542648867283386549027383784843786494855594607L), tolerance);
-   }
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.96875L), static_cast<RealType>(7.L)), static_cast<RealType>(0.08316748474602973770533230453272140919966614259525787470390475393923633179072L), tolerance);
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(4.78125L), static_cast<RealType>(0.0625L)), static_cast<RealType>(1.06329748046874638058307112826015825291136503488102191050906959246644942646701e-7L), tolerance);
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.0625L), static_cast<RealType>(4.78125L)), static_cast<RealType>(0.21571185819897989857261253680409017017649352928888660746045361855686569265171L), tolerance);
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(2.L), static_cast<RealType>(0.5L)), static_cast<RealType>(0.00862507798552150713113488319154637187875641190390854291100809449487812876461L), tolerance);
+   BOOST_CHECK_CLOSE_FRACTION(owens_t(static_cast<RealType>(0.5L), static_cast<RealType>(2L)), static_cast<RealType>(0.14158060365397839346662819588111542648867283386549027383784843786494855594607L), tolerance);
    // check basic properties
    BOOST_CHECK_EQUAL(owens_t(static_cast<RealType>(0.5L), static_cast<RealType>(2L)), owens_t(static_cast<RealType>(-0.5L), static_cast<RealType>(2L)));
    BOOST_CHECK_EQUAL(owens_t(static_cast<RealType>(0.5L), static_cast<RealType>(2L)), -owens_t(static_cast<RealType>(0.5L), static_cast<RealType>(-2L)));
diff --git a/test/test_pareto.cpp b/test/test_pareto.cpp
index 35a5bb0098..b59b93f189 100644
--- a/test/test_pareto.cpp
+++ b/test/test_pareto.cpp
@@ -24,15 +24,19 @@
 #  pragma warning(disable: 4100) // unreferenced formal parameter.
 #endif
 
-#include <boost/math/tools/test.hpp> // for real_concept
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/math/distributions/pareto.hpp>
     using boost::math::pareto_distribution;
-#include <boost/math/tools/test.hpp>
 #include "test_out_of_range.hpp"
 
 #include <iostream>
@@ -47,11 +51,13 @@
   void check_pareto(RealType scale, RealType shape, RealType x, RealType p, RealType q, RealType tol)
   {
     RealType logtol = tol * 10;
+    #ifndef BOOST_MATH_HAS_GPU_SUPPORT
     BOOST_IF_CONSTEXPR (std::is_same<RealType, long double>::value || 
                         std::is_same<RealType, boost::math::concepts::real_concept>::value)
     {
       logtol *= 100;
     }
+    #endif
     
     BOOST_CHECK_CLOSE_FRACTION(
       ::boost::math::cdf(
diff --git a/test/test_pareto_cdf_double.cu b/test/test_pareto_cdf_double.cu
new file mode 100644
index 0000000000..94ca6618a2
--- /dev/null
+++ b/test/test_pareto_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::pareto_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_pareto_cdf_float.cu b/test/test_pareto_cdf_float.cu
new file mode 100644
index 0000000000..7778900a47
--- /dev/null
+++ b/test/test_pareto_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::pareto_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_pareto_cdf_nvrtc_double.cpp b/test/test_pareto_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..55e7ecbb0a
--- /dev/null
+++ b/test/test_pareto_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/pareto.hpp>
+extern "C" __global__ 
+void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_pareto_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::pareto_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_pareto_cdf_nvrtc_float.cpp b/test/test_pareto_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..a9d4f62500
--- /dev/null
+++ b/test/test_pareto_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/pareto.hpp>
+extern "C" __global__ 
+void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_pareto_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::pareto_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_pareto_pdf_double.cu b/test/test_pareto_pdf_double.cu
new file mode 100644
index 0000000000..0eae49b445
--- /dev/null
+++ b/test/test_pareto_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::pareto_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_pareto_pdf_float.cu b/test/test_pareto_pdf_float.cu
new file mode 100644
index 0000000000..8dbd97311a
--- /dev/null
+++ b/test/test_pareto_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::pareto_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_pareto_pdf_nvrtc_double.cpp b/test/test_pareto_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..6533303cd6
--- /dev/null
+++ b/test/test_pareto_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/pareto.hpp>
+extern "C" __global__ 
+void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_pareto_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::pareto_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_pareto_pdf_nvrtc_float.cpp b/test/test_pareto_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..a5d415ae4f
--- /dev/null
+++ b/test/test_pareto_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/pareto.hpp>
+extern "C" __global__ 
+void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_pareto_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::pareto_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_pareto_quan_double.cu b/test/test_pareto_quan_double.cu
new file mode 100644
index 0000000000..1d2c47ef93
--- /dev/null
+++ b/test/test_pareto_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::pareto_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_pareto_quan_float.cu b/test/test_pareto_quan_float.cu
new file mode 100644
index 0000000000..410067ea5c
--- /dev/null
+++ b/test/test_pareto_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch pareto distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::pareto_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_pareto_quan_nvrtc_double.cpp b/test/test_pareto_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..59444ee351
--- /dev/null
+++ b/test/test_pareto_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/pareto.hpp>
+extern "C" __global__ 
+void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_pareto_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::pareto_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_pareto_quan_nvrtc_float.cpp b/test/test_pareto_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..75eda6bee4
--- /dev/null
+++ b/test/test_pareto_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/pareto.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/pareto.hpp>
+extern "C" __global__ 
+void test_pareto_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::pareto_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_pareto_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_pareto_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_pareto_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::pareto_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_poisson.cpp b/test/test_poisson.cpp
index 9b75ce162f..2a0bc4c499 100644
--- a/test/test_poisson.cpp
+++ b/test/test_poisson.cpp
@@ -23,19 +23,24 @@
 #  pragma warning(disable: 4127) // conditional expression is constant.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #include <boost/math/distributions/poisson.hpp>
     using boost::math::poisson_distribution;
-#include <boost/math/tools/test.hpp> // for real_concept
 
 #include <boost/math/special_functions/gamma.hpp> // for (incomplete) gamma.
 //   using boost::math::qamma_Q;
 #include "table_type.hpp"
 #include "test_out_of_range.hpp"
+#include "../include_private/boost/math/tools/test.hpp"
 
 #include <iostream>
    using std::cout;
@@ -53,12 +58,12 @@ void test_spots(RealType)
    // guaranteed for type RealType, eg 6 for float, 15 for double,
    // expressed as a percentage (so -2) for BOOST_CHECK_CLOSE,
 
-   int decdigits = numeric_limits<RealType>::digits10;
+   int decdigits = std::numeric_limits<RealType>::digits10;
   // May eb >15 for 80 and 128-bit FP types.
   if (decdigits <= 0)
   { // decdigits is not defined, for example real concept,
     // so assume precision of most test data is double (for example, MathCAD).
-     decdigits = numeric_limits<double>::digits10; // == 15 for 64-bit
+     decdigits = std::numeric_limits<double>::digits10; // == 15 for 64-bit
   }
   if (decdigits > 15 ) // numeric_limits<double>::digits10)
   { // 15 is the accuracy of the MathCAD test data.
@@ -106,6 +111,7 @@ void test_spots(RealType)
   using  ::boost::math::pdf;
 
    // Check that bad arguments throw.
+   #ifndef BOOST_MATH_NO_EXCEPTIONS
    BOOST_MATH_CHECK_THROW(
    cdf(poisson_distribution<RealType>(static_cast<RealType>(0)), // mean zero is bad.
       static_cast<RealType>(0)),  // even for a good k.
@@ -155,6 +161,7 @@ void test_spots(RealType)
      quantile(complement(poisson_distribution<RealType>(static_cast<RealType>(1)), 
       static_cast<RealType>(0))),  // bad probability. 
       std::overflow_error);
+   #endif
 
   BOOST_CHECK_EQUAL(
      quantile(poisson_distribution<RealType>(static_cast<RealType>(1)), 
@@ -559,6 +566,7 @@ BOOST_AUTO_TEST_CASE( test_main )
   // poisson mydudpoisson(0.);
   // throws (if BOOST_MATH_DOMAIN_ERROR_POLICY == throw_on_error).
 
+#ifndef BOOST_MATH_NO_EXCEPTIONS
 #ifndef BOOST_NO_EXCEPTIONS
   BOOST_MATH_CHECK_THROW(poisson mydudpoisson(-1), std::domain_error);// Mean must be > 0.
   BOOST_MATH_CHECK_THROW(poisson mydudpoisson(-1), std::logic_error);// Mean must be > 0.
@@ -570,7 +578,7 @@ BOOST_AUTO_TEST_CASE( test_main )
   // BOOST_MATH_CHECK_THROW(poisson mydudpoisson(-1), std::overflow_error); // fails the check
   // because overflow_error is unrelated - except from std::exception
   BOOST_MATH_CHECK_THROW(cdf(mypoisson, -1), std::domain_error); // k must be >= 0
-
+#endif
   BOOST_CHECK_EQUAL(mean(mypoisson), 4.);
   BOOST_CHECK_CLOSE(
   pdf(mypoisson, 2.),  // k events = 2. 
@@ -644,7 +652,7 @@ BOOST_AUTO_TEST_CASE( test_main )
   test_spots(0.0); // Test double.
 #endif
 #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
-  if (numeric_limits<long double>::digits10 > numeric_limits<double>::digits10)
+  if (std::numeric_limits<long double>::digits10 > std::numeric_limits<double>::digits10)
   { // long double is better than double (so not MSVC where they are same).
 #ifdef TEST_LDOUBLE
      test_spots(0.0L); // Test long double.
diff --git a/test/test_poisson_cdf_double.cu b/test/test_poisson_cdf_double.cu
new file mode 100644
index 0000000000..34ca74a622
--- /dev/null
+++ b/test/test_poisson_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::poisson_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_poisson_cdf_float.cu b/test/test_poisson_cdf_float.cu
new file mode 100644
index 0000000000..0c024f6692
--- /dev/null
+++ b/test/test_poisson_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::poisson_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_poisson_cdf_nvrtc_double.cpp b/test/test_poisson_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..81ef15adc6
--- /dev/null
+++ b/test/test_poisson_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/poisson.hpp>
+extern "C" __global__ 
+void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_poisson_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::poisson_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_poisson_cdf_nvrtc_float.cpp b/test/test_poisson_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..8e53303873
--- /dev/null
+++ b/test/test_poisson_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/poisson.hpp>
+extern "C" __global__ 
+void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_poisson_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::poisson_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_poisson_pdf_double.cu b/test/test_poisson_pdf_double.cu
new file mode 100644
index 0000000000..616fe0ba4f
--- /dev/null
+++ b/test/test_poisson_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::poisson_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_poisson_pdf_float.cu b/test/test_poisson_pdf_float.cu
new file mode 100644
index 0000000000..81ac558999
--- /dev/null
+++ b/test/test_poisson_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::poisson_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_poisson_pdf_nvrtc_double.cpp b/test/test_poisson_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..514963d638
--- /dev/null
+++ b/test/test_poisson_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/poisson.hpp>
+extern "C" __global__ 
+void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_poisson_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::poisson_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_poisson_pdf_nvrtc_float.cpp b/test/test_poisson_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..9b79360094
--- /dev/null
+++ b/test/test_poisson_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/poisson.hpp>
+extern "C" __global__ 
+void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_poisson_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::poisson_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_poisson_quan_double.cu b/test/test_poisson_quan_double.cu
new file mode 100644
index 0000000000..b1ef1a17c5
--- /dev/null
+++ b/test/test_poisson_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::poisson_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_poisson_quan_float.cu b/test/test_poisson_quan_float.cu
new file mode 100644
index 0000000000..82a28bd882
--- /dev/null
+++ b/test/test_poisson_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch poisson distribution kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::poisson_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_poisson_quan_nvrtc_double.cpp b/test/test_poisson_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..572d57a257
--- /dev/null
+++ b/test/test_poisson_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/poisson.hpp>
+extern "C" __global__ 
+void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_poisson_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::poisson_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_poisson_quan_nvrtc_float.cpp b/test/test_poisson_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..34bd1ea53c
--- /dev/null
+++ b/test/test_poisson_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/poisson.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/poisson.hpp>
+extern "C" __global__ 
+void test_poisson_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::poisson_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_poisson_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_poisson_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_poisson_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::poisson_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_rayleigh.cpp b/test/test_rayleigh.cpp
index de92dfa848..0d4ebf2a80 100644
--- a/test/test_rayleigh.cpp
+++ b/test/test_rayleigh.cpp
@@ -13,10 +13,15 @@
 #  pragma warning(disable: 4100) // unreferenced formal parameter.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #include <boost/math/distributions/rayleigh.hpp>
     using boost::math::rayleigh_distribution;
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
@@ -36,11 +41,13 @@ void test_spot(RealType s, RealType x, RealType p, RealType q, RealType toleranc
 {
    RealType logtolerance = tolerance;
 
+   #ifndef BOOST_MATH_HAS_GPU_SUPPORT
    BOOST_IF_CONSTEXPR (std::is_same<RealType, long double>::value || 
                        std::is_same<RealType, boost::math::concepts::real_concept>::value)
    {
       logtolerance *= 100;
    }
+   #endif
    
    BOOST_CHECK_CLOSE(
       ::boost::math::cdf(
diff --git a/test/test_rayleigh_cdf_double.cu b/test/test_rayleigh_cdf_double.cu
new file mode 100644
index 0000000000..d6056dcaf1
--- /dev/null
+++ b/test/test_rayleigh_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::rayleigh_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::rayleigh_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_rayleigh_cdf_float.cu b/test/test_rayleigh_cdf_float.cu
new file mode 100644
index 0000000000..2c86ec1ba3
--- /dev/null
+++ b/test/test_rayleigh_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::rayleigh_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::rayleigh_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_rayleigh_cdf_nvrtc_double.cpp b/test/test_rayleigh_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f57595d195
--- /dev/null
+++ b/test/test_rayleigh_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/rayleigh.hpp>
+extern "C" __global__ 
+void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::rayleigh_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_rayleigh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::rayleigh_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_rayleigh_cdf_nvrtc_float.cpp b/test/test_rayleigh_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..aeeaacadfb
--- /dev/null
+++ b/test/test_rayleigh_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/rayleigh.hpp>
+extern "C" __global__ 
+void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::rayleigh_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_rayleigh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::rayleigh_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_rayleigh_pdf_double.cu b/test/test_rayleigh_pdf_double.cu
new file mode 100644
index 0000000000..b83ae3cbf0
--- /dev/null
+++ b/test/test_rayleigh_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::rayleigh_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::rayleigh_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_rayleigh_pdf_float.cu b/test/test_rayleigh_pdf_float.cu
new file mode 100644
index 0000000000..a5bfee42da
--- /dev/null
+++ b/test/test_rayleigh_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::rayleigh_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::rayleigh_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_rayleigh_pdf_nvrtc_double.cpp b/test/test_rayleigh_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..17662bba07
--- /dev/null
+++ b/test/test_rayleigh_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/rayleigh.hpp>
+extern "C" __global__ 
+void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::rayleigh_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_rayleigh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::rayleigh_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_rayleigh_pdf_nvrtc_float.cpp b/test/test_rayleigh_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..82cc534e8b
--- /dev/null
+++ b/test/test_rayleigh_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/rayleigh.hpp>
+extern "C" __global__ 
+void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::rayleigh_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_rayleigh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::rayleigh_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_rayleigh_quan_double.cu b/test/test_rayleigh_quan_double.cu
new file mode 100644
index 0000000000..65084e57cf
--- /dev/null
+++ b/test/test_rayleigh_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::rayleigh_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::rayleigh_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_rayleigh_quan_float.cu b/test/test_rayleigh_quan_float.cu
new file mode 100644
index 0000000000..7a03396646
--- /dev/null
+++ b/test/test_rayleigh_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::rayleigh_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::rayleigh_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_rayleigh_quan_nvrtc_double.cpp b/test/test_rayleigh_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..c105b50470
--- /dev/null
+++ b/test/test_rayleigh_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/rayleigh.hpp>
+extern "C" __global__ 
+void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::rayleigh_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_rayleigh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::rayleigh_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_rayleigh_quan_nvrtc_float.cpp b/test/test_rayleigh_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..88c9418a32
--- /dev/null
+++ b/test/test_rayleigh_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/rayleigh.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/rayleigh.hpp>
+extern "C" __global__ 
+void test_rayleigh_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::rayleigh_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_rayleigh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_rayleigh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_rayleigh_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::rayleigh_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_round.cpp b/test/test_round.cpp
index 95ff4d234e..e603aa510d 100644
--- a/test/test_round.cpp
+++ b/test/test_round.cpp
@@ -3,12 +3,20 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp>
+#endif
+
+#ifdef __clang__
+#  pragma clang diagnostic push 
+#  pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
+#endif
 
 #include <boost/math/concepts/real_concept.hpp>
 #define BOOST_TEST_MAIN
+#include <boost/math/tools/config.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/test/unit_test.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/round.hpp>
 #include <boost/math/special_functions/next.hpp>
@@ -222,6 +230,7 @@ void test_round(T, const char* name )
    //
    // Finish off by testing the error handlers:
    //
+   #ifndef BOOST_MATH_NO_EXCEPTIONS
    BOOST_MATH_CHECK_THROW(iround(static_cast<T>(1e20)), boost::math::rounding_error);
    BOOST_MATH_CHECK_THROW(iround(static_cast<T>(-1e20)), boost::math::rounding_error);
    BOOST_MATH_CHECK_THROW(lround(static_cast<T>(1e20)), boost::math::rounding_error);
@@ -314,6 +323,7 @@ void test_round(T, const char* name )
       BOOST_MATH_CHECK_THROW(llround(static_cast<T>((std::numeric_limits<boost::long_long_type>::min)()) - 1), boost::math::rounding_error);
    }
 #endif
+   #endif
    //
    // try non-throwing error handlers:
    //
diff --git a/test/test_round_double.cu b/test/test_round_double.cu
new file mode 100644
index 0000000000..3dae4342d2
--- /dev/null
+++ b/test/test_round_double.cu
@@ -0,0 +1,98 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/round.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::round(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_round_float.cu b/test/test_round_float.cu
new file mode 100644
index 0000000000..45dd14c03a
--- /dev/null
+++ b/test/test_round_float.cu
@@ -0,0 +1,98 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/round.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::round(in[i]) + boost::math::iround(in[i]) + boost::math::lround(in[i]) + boost::math::llround(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::round(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
+
diff --git a/test/test_round_nvrtc_double.cpp b/test/test_round_nvrtc_double.cpp
new file mode 100644
index 0000000000..228e3dd674
--- /dev/null
+++ b/test/test_round_nvrtc_double.cpp
@@ -0,0 +1,194 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/round.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/round.hpp>
+extern "C" __global__ 
+void test_round_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::round(in1[i]) + 
+                 boost::math::lround(in1[i]) + 
+                 boost::math::llround(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_round_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_round_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_round_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::round(h_in1[i]) + 
+                       boost::math::lround(h_in1[i]) + 
+                       boost::math::llround(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_round_nvrtc_float.cpp b/test/test_round_nvrtc_float.cpp
new file mode 100644
index 0000000000..8554add7cd
--- /dev/null
+++ b/test/test_round_nvrtc_float.cpp
@@ -0,0 +1,194 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/round.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/round.hpp>
+extern "C" __global__ 
+void test_round_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::round(in1[i]) + 
+                 boost::math::lround(in1[i]) + 
+                 boost::math::llround(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_round_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_round_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_round_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::round(h_in1[i]) + 
+                       boost::math::lround(h_in1[i]) + 
+                       boost::math::llround(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_saspoint5.cpp b/test/test_saspoint5.cpp
new file mode 100644
index 0000000000..0703b3c547
--- /dev/null
+++ b/test/test_saspoint5.cpp
@@ -0,0 +1,987 @@
+//  Copyright Takuma Yoshimura 2024.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_TEST_MAIN
+#define BOOST_TEST_MODULE StatsSaSpoint5Test
+#include <boost/math/tools/config.hpp>
+#include <boost/test/included/unit_test.hpp>
+#include <boost/test/tools/floating_point_comparison.hpp>
+
+#include <boost/math/distributions/saspoint5.hpp>
+
+#if __has_include(<stdfloat>)
+# include <stdfloat>
+#endif
+
+using boost::math::saspoint5_distribution;
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+#include <boost/multiprecision/cpp_bin_float.hpp>
+using boost::multiprecision::cpp_bin_float_quad;
+#endif
+
+template<class RealType, int N>
+void do_test_saspoint5_pdf(){
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    saspoint5_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65057384221262866484014802392420311075288403543570e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79329640523490376041131493419821198600076403419386e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95852304370396879516224023732198088002401888082616e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15165745993244539388932384769132623478818505719254e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37991930003932826612228434406591616554240049257806e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65315905048420909110038030496372707676321065455853e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98514782971078642902580240066249560381976244868252e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39569569206548434200616180477229996876301302607033e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91428580496513429479068747515164587814473831035141e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22747038893846641865142698258984886289652875016810e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58664432176873307685856460747711890398474760309135e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.00211113907132159419276991035240896253998639909692e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.48740303942235865293078214071409846807501918299929e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.06062847902700317213276816720705695621399130112975e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.74654707859965380374478388584454386653113140787247e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.57987022193031994457557921317658430036146780413966e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61071469126041183247373313827161939453635781053656e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22226841107155902731102159116498022415283445125970e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.91402704796678469983705063964301389721348302634810e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07020274682586388030767450697192728366834992164365e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16067678880616390663660038956460612725510361666743e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26548452861398188870931570373322249360362213948984e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38813584257594010463041381178865326769684234839162e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53335207865128979807776513257284069794567700115319e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70762401725206223811383500786268939644546879037607e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.80837633035190650461893908063251050472045384954446e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92009924039028830545431687288769864540254439571691e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04458865222194329452641393707783622253522148720927e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18403802814813998631607652585109350067000954260159e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34114858767964975455487653703370079404113295500766e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51927454673218629749392967500746624956301580234179e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72261384495736629760803695406845321684342258638651e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.95645445681747568731488283573032414811445124048278e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08683711301287379599769504729521857091008674908403e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22747777451211671698876953149906906288207621283740e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37948342659965914836420291897431271791569870739942e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54408462624872191505363112311495187968578947455635e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72263350586688041299651258469337654343439789762344e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91658567295250072264436343404335704809676265611623e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.12745086277094097472011430871049070050383274543942e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35668401768623200524372663239480799018368629449958e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60546312221207321659735213679473165482665805082365e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87425051951658593562735199923143476116556674561953e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16193612095139713576014676574899762241637086348666e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.46416716200748206779925127900698754118588044244443e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77011156717390291621839083396306231100301720494794e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05643252618763782827233685490846659295644445464474e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(-0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27802677165093171536432271430054096075199851236277e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.36619772367581343075535053490057448137838582961826e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.27802677165093171536432271430054096075199851236277e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05643252618763782827233685490846659295644445464474e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.77011156717390291621839083396306231100301720494794e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.46416716200748206779925127900698754118588044244443e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.16193612095139713576014676574899762241637086348666e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87425051951658593562735199923143476116556674561953e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.60546312221207321659735213679473165482665805082365e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.35668401768623200524372663239480799018368629449958e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.12745086277094097472011430871049070050383274543942e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91658567295250072264436343404335704809676265611623e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.72263350586688041299651258469337654343439789762344e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54408462624872191505363112311495187968578947455635e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37948342659965914836420291897431271791569870739942e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.22747777451211671698876953149906906288207621283740e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08683711301287379599769504729521857091008674908403e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.95645445681747568731488283573032414811445124048278e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.72261384495736629760803695406845321684342258638651e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51927454673218629749392967500746624956301580234179e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34114858767964975455487653703370079404113295500766e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18403802814813998631607652585109350067000954260159e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04458865222194329452641393707783622253522148720927e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.92009924039028830545431687288769864540254439571691e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.80837633035190650461893908063251050472045384954446e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70762401725206223811383500786268939644546879037607e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.53335207865128979807776513257284069794567700115319e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38813584257594010463041381178865326769684234839162e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26548452861398188870931570373322249360362213948984e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16067678880616390663660038956460612725510361666743e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.07020274682586388030767450697192728366834992164365e-1), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.91402704796678469983705063964301389721348302634810e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.22226841107155902731102159116498022415283445125970e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61071469126041183247373313827161939453635781053656e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.57987022193031994457557921317658430036146780413966e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.74654707859965380374478388584454386653113140787247e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.06062847902700317213276816720705695621399130112975e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.48740303942235865293078214071409846807501918299929e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.00211113907132159419276991035240896253998639909692e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.58664432176873307685856460747711890398474760309135e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22747038893846641865142698258984886289652875016810e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91428580496513429479068747515164587814473831035141e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39569569206548434200616180477229996876301302607033e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.98514782971078642902580240066249560381976244868252e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.65315905048420909110038030496372707676321065455853e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37991930003932826612228434406591616554240049257806e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15165745993244539388932384769132623478818505719254e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95852304370396879516224023732198088002401888082616e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79329640523490376041131493419821198600076403419386e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.65057384221262866484014802392420311075288403543570e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41709229627396868333284301965586098495341505333984e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23486804023715403906392442935982382364217653266772e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.08928004905124817201928007015908544910529569714342e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.70693662562544123549117032234725245555978806476399e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.72521377242204601673499906339041894791681660981780e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.90118208003638158949103509569466271651423947601700e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.20122199033087670451578082577537221533524657616689e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.60044810497290557552736366450372523266254544297541e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(9)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.62568921883700157286068393009259359194911515759460e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(10)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.87225538372111615796471446753804828223647849403478e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27521869884978603565623102805698827447589635270965e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.79233594205638037575121942287372720508125859480601e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.39503653350126602107748523248767674409207708020015e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.06336366766043497753645671560287680725185122482037e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78299184327161285281451711213360177146934142134357e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54339461777955741686401041938275102206830951452701e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.15681965291001751496355126714032815564803295371462e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85998635069315959390913560004569618374730778555584e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62608332259613979271666391679559272984023328035102e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43781372306723685704327334057789272970358761256651e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28356349911089078230470059243188541053449454702148e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.15527095704472843921748976306796551576061214402975e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04718107132157571106811924450948266398728667364202e-3), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.55085695067883584460317653567009454037053097890033e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.07065059415060466523722913682664006023733154400679e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.93920671056491824666303660784084772546983031349823e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.05105752932345415433215445597062488386383300986381e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33854958380909922933873661422920090995959934982337e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.75647800724537927733394825316181366013592737695448e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27359757459764665859327891341356484542630879530500e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.86768631746691871635029152331878689826668170171440e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.52254909340415842254122300688608224784456787996634e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96929220521963282370464091269016751022713088743640e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.54771289809160518824087477899271848785553979294988e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21766390014321750130289247373333697019024113873128e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.95349215386026125880558883477711894508813402521643e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73811374936907293362277490498742346151511716286084e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55975461659552125943580943693269482505834989982945e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41006225150819455490222552628817791206744288460867e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28296324273402304808354336538549844914883481532620e-4), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.63180321475387230691526892287847669196997380044025e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(512)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66189768326912251607424407981263646223230063365226e-5), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, static_cast<RealType>(1024)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.93708011998897739786377954224930716533616609090213e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 11)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.11452967162743112245198615236484567129951718982008e-6), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 12)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.51482080833564338340856966996445313498079083789150e-7), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 13)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66663174159211670027542189178751103023898387998523e-7), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 14)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.45238046039194445466823557091765258247216574897413e-8), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 15)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.34803516847185617806978815778928715951216310353916e-8), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 16)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18523956611649861747653810989133680515605456258881e-8), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 17)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.19428394481581920558479056092124822410228982935516e-9), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 18)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48386121491270229946980639019289494044106626390164e-9), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 19)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.24863743761365464204033370116809431054495238229389e-10), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 20)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85627269567198410070703889816912945617135110666962e-10), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 21)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.56441321254434701892659687588069630508140502616495e-11), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 22)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32124514546671241752437061147193743959791071682877e-11), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 23)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.20777750941928700289773286319085009877829580100394e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 24)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.90212173401654051439660118394241371322264056105160e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02611352372467016891796952205325499928128114461973e-12), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 26)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62800552193521241168029742332979605534754328327140e-13), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 27)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28273024622868202243706785685607188210679822726442e-13), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 28)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.53522776159999787175268870767580373267754737032134e-14), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 29)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.60346802350897629386145672448930505388450307742542e-14), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 30)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66917274294807913528606004426646232913424627695599e-15), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 31)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.00436953991320274750778716874695704836278902566390e-15), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 32)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.08655220551513998962815689594959332706015306211632e-16), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 34)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85824418029941626524148561830736428793658611772587e-17), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 36)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10728389276235510083072832489179009106826041868534e-17), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 38)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38410697234730043795694534456878302211221269628364e-18), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 40)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73013503193177856102694285471291153594530345945373e-19), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 42)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16266961272612513551592941445333504133461857377298e-20), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 44)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.70333753016489787852265460543736089676535476354358e-21), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 46)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.37917223411693427580584068017775560157489239248184e-22), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 48)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22396549352793655444565334974277876088154820156655e-23), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 50)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.27995699246102965377026386970340969776925728507708e-24), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 52)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.59994631904573126677044584818697892085174708564778e-25), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 54)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.24993294785056705165902599588868178759415543962830e-26), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 56)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03124162154653357770314062605026847259176035189343e-26), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 58)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28905202884892491063675049720005913844624989838025e-27), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 60)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61131503725850485091152332729977897011733755140179e-28), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 62)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01414379732147400935170577498436447103578962194983e-29), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 64)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51767974711955685286218349809111390785355323964808e-30), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 68)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.93387460542241032622871979569574240917519314612402e-32), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 72)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14667907118661876897809516737349031422411025903664e-34), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 76)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.60418604881272566803193825546858269428369981726767e-36), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 80)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.50065407013025533256374121391266749023560191365933e-37), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 84)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.34477198457980010827684344075634340204878641095256e-39), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 88)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.66370622590643616572397339764831428700168401193312e-41), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 92)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.72454097797900123415519497865404920881954749456838e-43), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 96)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.94459527809226549290322908699069488543719245241219e-45), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 100)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39759301220191945453705676879390798465474357652431e-46), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 104)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18373908156550030836685714809779087770458087234145e-48), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 108)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.41209231494609468520317755439405311521523337977429e-50), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 112)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.33139424210327312273151307737011597702780857146510e-52), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 116)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.33030350328636432344828142894916983611711484679173e-54), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 120)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30160992238849442824114913911543137689436308410131e-55), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 124)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.03376550373202254518240301652494105481741383790265e-57), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522725985138779564208904631966988869e-59), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 128)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.17775859958128522725985138779564208904631966988869e-59), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 136)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75819970600899713717946709855764494008629724492550e-63), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 144)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89409172509985281669900618169013228855150809470644e-66), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 152)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62424737573206254076979053560233289660261329999266e-70), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 160)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12896664446583558124263581916004539046704057066258e-73), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 168)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75626622184041889951815556079707845048600270288339e-77), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 176)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.72916558066508520390174723482957902400087321179181e-81), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 184)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.64286269059206181735882501247406923061001590063666e-84), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 192)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01089524070327592128619387871640956557675279607402e-88), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 200)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.79222470874823222970262177430243561283276571668161e-92), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 208)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.39067986053423638420474164411821626475827252277446e-95), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 216)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.83662075325741304737485752958570418264028678048960e-99), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 224)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.42495623858823560726925232655901309474567221057157e-102), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 232)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.47889706686580958805969806288821606452354641951732e-106), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 240)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.49340104215285543959887222384818382775213055976161e-110), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 248)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.07358423880684947255831841402543550598919723533894e-113), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist, ldexp(static_cast<RealType>(1), 256)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.06246152052453484511308206549178590347199333203433e-117), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_saspoint5_cdf() {
+    //
+    // Basic sanity checks, tolerance is either 5 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 5;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    saspoint5_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63772802979087199762340235165979754525757604354946e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68073422703516098355866522837852256596174289760926e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72758003279298484112934948051066338979626419965674e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77889244858892026253876673752518849640459231269905e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83545491848533781581075416061045687904524946502798e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89826216632004296533051251749295777627320352902276e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96860081561627764765290065134912151446894162470366e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04817161935798121677981868384882359833404907918648e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13928162275383718405630406427822962549133076160983e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19012422407024648274998126765803685575224367865758e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.24515968592015292591213160991283529212368507398551e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30502444449412087831256855850010593081747948385277e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37050277816586024016699127500442336442368833061485e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44257500572741387471454711747681504603506105875273e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52248669635662507718372623246751628200366838868998e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61185078990430196469782036568483593931021694091782e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.71280312689343266367958859259591543958449635451967e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76849301027297680034801351032711397512725349083505e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82824834763715399283433728056507088064285422281461e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.89261854625965316722646909500096608624627558333255e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96226648191670403196061310683376239616840103866983e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03800118279447955882543375356729458148212335628057e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12082296340545811234455810077892380496395042488318e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21198706286302016964660139725707925695690953535621e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31309550000758082761278726632760757082965788779064e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36800649890180441699589040706573844882916813862746e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.42623318957604837956085530683032551753868317236287e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48814554885607965224431388327096964758526950639963e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55417563843010824151990624091505209837963398189766e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62483167142022658662698467701475815382565060700827e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70071604494776495147661422048036562690579901259931e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78254859359158098510888509896172431097522748703865e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.87119665000174806422420129219814480076318292452197e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91839717977573455659490354583900768691773107778703e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96771371640645784475148651296909803541218278211201e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01931503128208807344707441840522746424640776733482e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07338815387886469109799016905025356718337763965952e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.13014030144742808960599738063482904351064602625491e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.18980066583859588646492309993087622866113891170991e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.25262169970425461499760639161103103516585531687363e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31887921568009055676985827521151927561905069756334e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38886999325100940692217415763934459424688336515323e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46290440538810971901075463907912052110770078557001e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54128928252324244858536397279015277684698852998387e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62429173448713128429248489740624277005917720602166e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.71206593195666184191280277407391829224588075252830e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.80450931389242086819408627721643501783105413645046e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(-0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.90099745314478063577240966101000818037412904585595e-1), tolerance);
+    BOOST_CHECK_EQUAL(cdf(dist, static_cast<RealType>(0)), static_cast<RealType>(0.5));
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.09900254685521936422759033898999181962587095414405e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19549068610757913180591372278356498216894586354954e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.28793406804333815808719722592608170775411924747170e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.37570826551286871570751510259375722994082279397834e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.45871071747675755141463602720984722315301147001613e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53709559461189028098924536092087947889229921442999e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.61113000674899059307782584236065540575311663484677e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68112078431990944323014172478848072438094930243666e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.74737830029574538500239360838896896483414468312637e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81019933416140411353507690006912377133886108829009e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86985969855257191039400261936517095648935397374509e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92661184612113530890200983094974643281662236034048e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.98068496871791192655292558159477253575359223266518e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.03228628359354215524851348703090196458781721788799e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08160282022426544340509645416099231308226892221297e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.12880334999825193577579870780185519923681707547803e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.21745140640841901489111490103827568902477251296135e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.29928395505223504852338577951963437309420098740069e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.37516832857977341337301532298524184617434939299173e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.44582436156989175848009375908494790162036601810234e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.51185445114392034775568611672903035241473049360037e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57376681042395162043914469316967448246131682763713e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63199350109819558300410959293426155117083186137254e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.68690449999241917238721273367239242917034211220936e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78801293713697983035339860274292074304309046464379e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87917703659454188765544189922107619503604957511682e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.96199881720552044117456624643270541851787664371943e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03773351808329596803938689316623760383159896133017e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.10738145374034683277353090499903391375372441666745e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.17175165236284600716566271943492911935714577718539e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23150698972702319965198648967288602487274650916495e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.28719687310656733632041140740408456041550364548033e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.38814921009569803530217963431516406068978305908218e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.47751330364337492281627376753248371799633161131002e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55742499427258612528545288252318495396493894124727e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.62949722183413975983300872499557663557631166938515e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69497555550587912168743144149989406918252051614723e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75484031407984707408786839008716470787631492601449e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(1.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80987577592975351725001873234196314424775632134242e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.95182838064201878322018131615117640166595092081352e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.03139918438372235234709934865087848553105837529634e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(2.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.10173783367995703466948748250704222372679647097724e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16454508151466218418924583938954312095475053497202e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.22110755141107973746123326247481150359540768730095e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.27241996720701515887065051948933661020373580034326e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(3.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.31926577296483901644133477162147743403825710239074e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.36227197020912800237659764834020245474242395645054e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(4.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.43871020523024507614851721583135267180360280594143e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.50483092818015575884212092733928976209364127755594e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(5.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56280494417603102056412672973803628491156640138235e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61420731826426116856340074125786936047174397075840e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(6.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.66021349395697519291462973006173132761227963124588e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.70172157747564156568159926049840862439194389470164e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(7.5)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.73943162478672589326613255747804240803057053984280e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist, static_cast<RealType>(8)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.77389877435125713385213180379500898219171021326151e-1), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_saspoint5_ccdf() {
+    //
+    // Basic sanity checks, tolerance is either 5 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 5;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    saspoint5_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.80987577592975351725001873234196314424775632134242e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.75484031407984707408786839008716470787631492601449e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.69497555550587912168743144149989406918252051614723e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.62949722183413975983300872499557663557631166938515e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.55742499427258612528545288252318495396493894124727e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.47751330364337492281627376753248371799633161131002e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.38814921009569803530217963431516406068978305908218e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.28719687310656733632041140740408456041550364548033e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23150698972702319965198648967288602487274650916495e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.17175165236284600716566271943492911935714577718539e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.10738145374034683277353090499903391375372441666745e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.03773351808329596803938689316623760383159896133017e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.96199881720552044117456624643270541851787664371943e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.87917703659454188765544189922107619503604957511682e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.78801293713697983035339860274292074304309046464379e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.68690449999241917238721273367239242917034211220936e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63199350109819558300410959293426155117083186137254e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57376681042395162043914469316967448246131682763713e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.51185445114392034775568611672903035241473049360037e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.44582436156989175848009375908494790162036601810234e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.37516832857977341337301532298524184617434939299173e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.29928395505223504852338577951963437309420098740069e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.21745140640841901489111490103827568902477251296135e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.12880334999825193577579870780185519923681707547803e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.234375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08160282022426544340509645416099231308226892221297e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.03228628359354215524851348703090196458781721788799e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.203125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.98068496871791192655292558159477253575359223266518e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92661184612113530890200983094974643281662236034048e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.171875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.86985969855257191039400261936517095648935397374509e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81019933416140411353507690006912377133886108829009e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.140625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.74737830029574538500239360838896896483414468312637e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68112078431990944323014172478848072438094930243666e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.109375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.61113000674899059307782584236065540575311663484677e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.09375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53709559461189028098924536092087947889229921442999e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.078125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.45871071747675755141463602720984722315301147001613e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.37570826551286871570751510259375722994082279397834e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.046875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.28793406804333815808719722592608170775411924747170e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.03125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.19549068610757913180591372278356498216894586354954e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(-0.015625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.09900254685521936422759033898999181962587095414405e-1), tolerance);
+    BOOST_CHECK_EQUAL(cdf(dist, static_cast<RealType>(0)), static_cast<RealType>(0.5));
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.015625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.90099745314478063577240966101000818037412904585595e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.03125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.80450931389242086819408627721643501783105413645046e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.046875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.71206593195666184191280277407391829224588075252830e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.0625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.62429173448713128429248489740624277005917720602166e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.078125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.54128928252324244858536397279015277684698852998387e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.09375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.46290440538810971901075463907912052110770078557001e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.109375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38886999325100940692217415763934459424688336515323e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31887921568009055676985827521151927561905069756334e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.140625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.25262169970425461499760639161103103516585531687363e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.15625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.18980066583859588646492309993087622866113891170991e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.171875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.13014030144742808960599738063482904351064602625491e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.1875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07338815387886469109799016905025356718337763965952e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.203125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.01931503128208807344707441840522746424640776733482e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.21875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.96771371640645784475148651296909803541218278211201e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.234375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.91839717977573455659490354583900768691773107778703e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.87119665000174806422420129219814480076318292452197e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.28125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.78254859359158098510888509896172431097522748703865e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.3125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.70071604494776495147661422048036562690579901259931e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.34375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62483167142022658662698467701475815382565060700827e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.55417563843010824151990624091505209837963398189766e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.40625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.48814554885607965224431388327096964758526950639963e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.4375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.42623318957604837956085530683032551753868317236287e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.46875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.36800649890180441699589040706573844882916813862746e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.31309550000758082761278726632760757082965788779064e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.5625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.21198706286302016964660139725707925695690953535621e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12082296340545811234455810077892380496395042488318e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.6875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.03800118279447955882543375356729458148212335628057e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.96226648191670403196061310683376239616840103866983e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.8125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.89261854625965316722646909500096608624627558333255e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.82824834763715399283433728056507088064285422281461e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(0.9375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76849301027297680034801351032711397512725349083505e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.71280312689343266367958859259591543958449635451967e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.125))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.61185078990430196469782036568483593931021694091782e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.52248669635662507718372623246751628200366838868998e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.375))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.44257500572741387471454711747681504603506105875273e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37050277816586024016699127500442336442368833061485e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.625))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30502444449412087831256855850010593081747948385277e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.24515968592015292591213160991283529212368507398551e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1.875))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19012422407024648274998126765803685575224367865758e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13928162275383718405630406427822962549133076160983e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.04817161935798121677981868384882359833404907918648e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96860081561627764765290065134912151446894162470366e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(2.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.89826216632004296533051251749295777627320352902276e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.83545491848533781581075416061045687904524946502798e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.77889244858892026253876673752518849640459231269905e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72758003279298484112934948051066338979626419965674e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(3.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.68073422703516098355866522837852256596174289760926e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.63772802979087199762340235165979754525757604354946e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(4.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.56128979476975492385148278416864732819639719405857e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.49516907181984424115787907266071023790635872244406e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(5.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.43719505582396897943587327026196371508843359861765e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38579268173573883143659925874213063952825602924160e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(6.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.33978650604302480708537026993826867238772036875412e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.29827842252435843431840073950159137560805610529836e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(7.5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.26056837521327410673386744252195759196942946015720e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22610122564874286614786819620499101780828978673850e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(9))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.16519065397529967010603916749661994058495038701903e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11285389689081092090080900757256655213056507935765e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06722664628240641657463412013080864534542465320880e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02697048057896404675748265840787538239835533463915e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.91095580948192894243059119812739170064298855954150e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.58851606064421509151228732938834025845710632753865e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.29657717833196446423749763633852786438048712229891e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.03056141356415128156562790092782169373827214514422e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.56233191903709448828683239881723764461808662475751e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.16188637156340219345609662100276117883253664060901e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.81416233095397134117651222065771104114974953207875e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.50842302656587649324679534871796140718152459081608e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.23677655844367661796208750471724232185503310585430e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.99327211922253387063577514052912042212460501121145e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.77332469896087134873409606852930319306153102107748e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.57333571766941514095434647381791053890536948900264e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.22230201122276543582018268125914307770458422539453e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.92306196750274052986046861933822660319647411503460e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66393766871935144778495176368042799313533760396735e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43664512355365430998504793608427289643995825810352e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.23512057902480812783067842173364430254180391574517e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.05480824780342286194892111115606787981112335506282e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.89220914007246019306457129229565668476439227449653e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74458482861654721048545411630851774084511858223315e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.48596753801598503319767014729339663622931424118435e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.26600858175294196667322269091614319302918711248774e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.07590501853150263567490077970815440178521221113294e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.90943298811551585430796926339912286222583383628380e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.76204970249877025738751243150107820229031119377257e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.63035057455899788326457066144517427412403198177255e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.51172649975372596610965298504076329424502797908092e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.40413948757894194852726127978788270443438529203793e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43203077043778334466271831424077221758122536074329e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(512))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.73229441904414627006839605354679934626132938387870e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, static_cast<RealType>(1024))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23125360026260788506665554837055341098621632464561e-2), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 11))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.73810855742053432677942037710993481024582345070051e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.19474376202028990304398162068685909212949561120262e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 13))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.38834786517841980543748022239405103577545566369058e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.10703837128619218169124143528829380601241157169895e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 15))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.19901414203078529527146967144846039577487804212104e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.55594175267868082494169767822936567533407971376816e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 17))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.10071922510385832628139939726551776872500893306030e-3), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 18))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.78577261178721830451003518231079692441875499186244e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 19))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.50662913793980803259595258646776816982578828753293e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.89440319684962022373915418699465975439163763242915e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 21))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.75407315047618220330559697044875151329985165407760e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 22))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.94758093727122517797618473870421858045294374184298e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 23))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.37722626166484277233388378972371910745163900023301e-4), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 24))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.73885317860649699113798763978865018837655503068780e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.88660557570383272842403146191017664814974769956447e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 26))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.86966373050763507830869601193566593204764987942763e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 27))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.44342136111091391837428138940493220527760482550819e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 28))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.43489115282263847728003495903341128041631655237825e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 29))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.72174032467196796417055336934387675311978746399112e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 30))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21746039858699001180528503217054534969400689600469e-5), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 31))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.60877573465331935753972340571396643508918647400950e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 32))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.08733904872845891588154720869252063263709578283331e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 34))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.04367878835689816266760026397728945536920490441769e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 36))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52184171018215269383027201178304809813110038062619e-6), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 38))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.60921434092694304578619454290395143640352517265958e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 40))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.80460861796838148522601893634284307293180095878460e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 42))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90230467086052636671801670850090200893179111799503e-7), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 44))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.51152425899360606075482273396798501430313790237214e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 46))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.75576235566956348303137380307065627552780450075955e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 48))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.37788123437797396666202471539368169920940171968838e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 50))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.18894063132478530361545271176786170012121225204887e-8), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 52))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.94470319196342264878568431977547823579033279922390e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 54))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.97235160481658539831961233459162678510644673997599e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 56))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48617580461701122279770683403051652704243983949025e-9), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 58))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.43087902860685242952854599081275466963995418723234e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 60))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.71543951568387529445493346979934712370552753482953e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 62))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85771975818704991725083904340140685678094729181896e-10), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 64))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.28859879179803026143850997564073108091243061664589e-11), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 68))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.32214969811127894197045507227361880900936362117960e-11), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 72))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.80537424537930446531251492646463803649696387208569e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 76))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.45134356135114531072734934087281275202470211834375e-12), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 80))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.62835890338181277331789748787296614195690507700714e-13), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 84))), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.07089725845700036860694806306823770374585029986489e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 88))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.26772431461440436935874981470647001703932928470872e-14), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 92))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.66931078653610734665125754039671239552191451228014e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 96))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.41732769663403286311621332289317114802389526570992e-15), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 100))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54331924158508592432390764336465547336588796613693e-16), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 104))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.85829810396271716489312806849560623979223766702760e-17), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 108))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.21457452599067943835349195212917512291401637194850e-17), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 112))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.53643631497669868784011108970123778268444560432089e-18), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 116))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.38410907874417467770730159801145325661085046788106e-18), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 120))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.46027269686043669786030013601997303312653750436687e-19), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 124))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.65068174215109174689577917816952001659129997288134e-20), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16267043553777293686425909692485421878259294596496e-20), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 128))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.16267043553777293686425909692485421878259294596496e-20), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 136))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.35166902221110808556756707276211088178619562345223e-21), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 144))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.44793138881942553480799933647572308735410707938645e-23), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 152))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.27995711801214095925541775450484763569750411131894e-24), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 160))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.29997319875758809953465243130019854989301769477462e-25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 168))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.06248324922349256220915840763819709280743596647028e-26), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 176))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28905203076468285138072402969870025328294044417034e-27), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 184))), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.05657519227926782112952519535313715734583596556713e-29), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 192))), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.03535949517454238820595324747603340202581381400690e-30), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 200))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.14709968448408899262872077968737723090225341248585e-31), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 208))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.96693730280255562039295048730519109566688181166036e-32), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 216))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22933581425159726274559405456576710378996415685244e-33), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 224))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.68334883907248289215996284103605325376468341179834e-35), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 232))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.80209302442030180759997677564753362950438836016578e-36), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 240))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.00130814026268862974998548477970853195201855431423e-37), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 248))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.87581758766418039359374092798731783299781533977493e-38), tolerance);
+    BOOST_CHECK_CLOSE(cdf(complement(dist, ldexp(static_cast<RealType>(1), 256))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17238599229011274599608807999207364564425192108311e-39), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_saspoint5_quantile_nearzero() {
+    //
+    // Basic sanity checks, tolerance is either 4 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 4;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    saspoint5_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.03125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52796721097108753422708089760626414214332697170320e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.0625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.56591342761460650504994018321276991271746451691815e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.09375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.47218526924747883249737601803820876470332879082284e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.64871492892195438064623231330402224448304864505255e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.15625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.49147047287704191198103513188406591369158240312651e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.1875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.83944503273842706198239703540854699099743899944658e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.21875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.88121977453918311545713678336944882698543050262508e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.25)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.28383277518932774280834618691095083936296378494548e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.28125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.91036143728268221909726623759154622815926077224027e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.3125)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.21997830440617027092737461125926863143124423787158e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.34375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.31665804788247227155363433182318290115115268840234e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.93390030300024509983060478023436359505370694478274e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.40625)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.90586867715715827365143687189250571789458607638023e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.4375)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.12403044179836064404720613683569098344014116798151e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.46875)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.11633396058323297953641398479658668551534240629654e-2), tolerance);
+    BOOST_CHECK_EQUAL(quantile(dist, static_cast<RealType>(0.5)), static_cast<RealType>(0));
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.50390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.14052785123109810231692510348602868600338388040310e-3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23083326042460532375320968182992121472721389674838e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.51171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.85291421003937677871303076502465267120545270922005e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.48262870167331460723959759110260709264416995145257e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.51953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.12205802380990657930467930989682569659575359795940e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.77305954522603022893044033304410291628163249182374e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.52734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.43730759016907991505397132799991250199870494073045e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.53125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.11633396058323297953641398479658668551534240629654e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.53515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.81156327963950319747789851203804452183342931095156e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.52434225749394951914392360992042907273807357938464e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.54296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.25596345497536099309066715771745797836543079457304e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.00768452063066360042362619764345937296973490700293e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.55078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.78074386337778547450756599814721014853736307848400e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.57637357991828558350746829889151355918095876787015e-2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.55859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03958102936522630889618724151593050682836910322233e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12403044179836064404720613683569098344014116798151e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.56640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.21111282406179691889755301310617804156919563608730e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.30095831353205038001599004861145346134019254008840e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.57421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.39370061391699662824182194486817335276363125501648e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.48947760819317620418221654282830443278848716355952e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.58203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.58843194157470428889620974461757508836357970580066e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.5859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.69071158647072305786338619311971877757990751767576e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.58984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79647039926214909876835259387875129672714154918583e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.59375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.90586867715715827365143687189250571789458607638023e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.59765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01907372223400262964766551738092686471437958693421e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.13626041895044652657368022843673732366221034591882e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.60546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.25761183081873425453317860727230431668344830552276e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.38331982156037948249435913385386178518604561255635e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.61328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.51358570582737493855890017214322139090955378416415e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.64862093447749446836087591919859178171321397783436e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.62109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.78864781940101196225997767818887450385653350744028e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.93390030300024509983060478023436359505370694478274e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.62890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.08462477761234306446751675008291773968116735262918e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.24108096043381713255460633677010126705310806557902e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.63671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.40354282984953644763765300071916088017186215793874e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.57229962948879265758087216105113103390988022371525e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.64453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.74765694682819607503814996847853363329594489637064e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.92993787373911729744782959203924101663354892145487e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.65234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.11948425704142844251280879133218574177911117813611e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.65625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.31665804788247227155363433182318290115115268840234e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.66015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.52184275961932281071152630025400546639852185492776e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.73544504485420224870167294387230375351476906471546e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.66796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.95789640337012040028773901682608788684052806162084e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.18965503395137956448285166074873893264110804474852e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.67578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.43120784446902465897667277460048286927884165223763e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.68307263618494854564544144712177429500269729880351e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.68359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.94580048000377446115526240745459082802949048053747e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.21997830440617027092737461125926863143124423787158e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.69140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.50623171706258215659862497663103390044283888606793e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.6953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.80522808468923275824853139897350453182651092750420e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.69921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.11767989861122001397308542198440114248503523449816e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.44434845679017252964098546334886010522317885397184e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.70703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.78604789681373771559537778013045202059314559478553e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.14364961859827390982162302693689202778040961541097e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.71484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.51808714040276392410190679405041748405632099066392e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.71875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.91036143728268221909726623759154622815926077224027e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.72265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.32154681743453457545426144342359967492966930439508e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.75279739912087792579527536186650514912572075425034e-1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.73046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.02053542591698211050464719677242867419786774375833e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.06805533335867623214314225100786133910606465280065e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.73828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.11798341618055383937616799179262778846061718089667e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.17047495787852935040070049482482614635865914736031e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.74609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.22569764738178929583883189036464601993502267035808e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.75)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.28383277518932774280834618691095083936296378494548e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.75390625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34507656531879179894649466860868632699377270813348e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.40964166091862115574755023135179331832042209281055e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.76171875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47775878407059212254277747469428558567366208287249e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.54967859343857147744530902523828155595548983145552e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.76953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.62567376708606422065437126777269201178590320302836e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.70604134209343637939962724226064374287142808011846e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.77734375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.79110534768099925573827334628551158224393429767853e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.78125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.88121977453918311545713678336944882698543050262508e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.78515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97677193016829061858313922289253621707890698703331e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.7890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.07818623846627168681622781867868137142587154742683e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.79296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.18592855185448213952914641315720989891399211112618e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.30051105624503972344131618629751440966474716683287e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.80078125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.42249786355837299731799089007913700561632679250929e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.55251140382947389347453508427822893893843094122006e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.80859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.69123974986228153051487214813181004658778348371858e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.83944503273842706198239703540854699099743899944658e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.81640625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.99797313731027706223638793579547090613393780037951e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.16776490443789287489784195960321745577830473993117e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.82421875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.34986911285371709243134965339463237363883153982305e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.54545757031320957733016491289957363442390624403356e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.83203125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.75584271388037850609173432108976499052095906002965e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.98249820637230758503593250844352437505576086329541e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.83984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.22708312477451358281191586486620734769643769191868e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.84375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.49147047287704191198103513188406591369158240312651e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.84765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.77778092239392917026904003217947497405149113745180e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8515625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.08842290488213149257806983557860643076120157707468e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.85546875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.42614045478507551744169142637868564301201400254633e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.859375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.79407056051931174009446309499422812782537696684469e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.86328125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.19581224082198008577426461565974789703165385453255e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.63551016172006234258563545412611777992137495354692e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.87109375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.11795639223606059741678567601858859535625176228939e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.64871492892195438064623231330402224448304864505255e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.87890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.23427499059001976769144078614154782774804457875826e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.88224092195729153863789585576784499265806702821498e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.88671875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.60156902856277721120026862113617294726340622812143e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.890625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.04028650542151264864039545865194654420659149847715e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.89453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.12987606827387901549180798300970689324385087503567e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.8984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.23043939515379786765482759320371211340959481379476e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.90234375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.34380276304930757031534891808152959303504163146383e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.90625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.47218526924747883249737601803820876470332879082284e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.91015625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.61830428929231043118455947432627297107033412348157e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.78551541642651806413112158903680754166702207111037e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.91796875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.97800037388343052469605081868946702260361591387557e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.20102262690774549296442657868648686186513838548331e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.92578125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.46128002888129351171085541318961326325857070786293e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9296875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76739893404884608939800784860050078072327340444416e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.93359375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.13063845566677402478557805956745594172754985803934e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.56591342761460650504994018321276991271746451691815e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.94140625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.09331197973287229185946564870900349250152411988374e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9453125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.74040079933008774660558150020826896333899909298154e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.94921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 5.54582183342592176371744225018671216049948426069910e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.953125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.56507752301931033036206963972726950780990330962767e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.95703125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.88016814096703401470072036376924072379132872310869e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 9.61631402738793734570515994292749381948933242893841e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.96484375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.19723988112353634841352379780826428736159712449728e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.96875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52796721097108753422708089760626414214332697170320e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.97265625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.01231735395463433145333990641210588531111270293944e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9765625)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.76159370958398793120791299241979313515017333512222e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.98046875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.00925470915587723915099888946681205221515647834867e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.984375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.31534559333932129219229424636026382581685146602733e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.98828125)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.13177617125304414676334752220512104772421667697176e3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.9921875)), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56685855095635428319020462289713380105568378947326e3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, static_cast<RealType>(0.99609375)), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03488985899360430847226178216145298481879311104883e4), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_saspoint5_quantile_lower() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    saspoint5_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -3)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.64871492892195438064623231330402224448304864505255e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -4)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.56591342761460650504994018321276991271746451691815e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -5)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.52796721097108753422708089760626414214332697170320e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -6)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.31534559333932129219229424636026382581685146602733e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -7)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.56685855095635428319020462289713380105568378947326e3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -8)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.03488985899360430847226178216145298481879311104883e4), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -10)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.66560111810464968102166352094329039797553809013180e5), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -12)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.66887306794201818857999976857911823063870328368667e6), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -14)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.27176145418643796488192673589719523675773715325372e7), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -16)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.83544414827242601690013182749435284100680743652971e8), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -20)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.74992376775466783667682507089110333268769521176506e11), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -24)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.47981285598233127388251456426050883440628660913084e13), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -28)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.14683221929996578514587273759806303045535542474016e16), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -32)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.93589050191487067535089237582921756933019831739543e18), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -40)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.92406520022739246958562710299618130085960993793414e23), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -48)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.26095536962330863369654555704556352597738045353834e28), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -56)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.26379711036337395023730103376706642065822106379442e32), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -64)), BOOST_MATH_BIG_CONSTANT(RealType, N, -5.41576207424774090175793327287164715152292673169682e37), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -80)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.32605209918111709774537445734765990902394758994645e47), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -96)), BOOST_MATH_BIG_CONSTANT(RealType, N, -9.99031769477504631556283515692397586421188586846623e56), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -112)), BOOST_MATH_BIG_CONSTANT(RealType, N, -4.29080877757089340022276728331106401823387140732677e66), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -128)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.84288833730567254754590245964397936517161760904520e76), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -160)), BOOST_MATH_BIG_CONSTANT(RealType, N, -3.39952895147018642535639799349517299692324064545541e95), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -192)), BOOST_MATH_BIG_CONSTANT(RealType, N, -6.27102405389367073393026479530690706073270941007595e114), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -224)), BOOST_MATH_BIG_CONSTANT(RealType, N, -1.15679975802253118434756797269580371455350883468455e134), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.13391890807707704630144658385551537641074743396505e153), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -256)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.13391890807707704630144658385551537641074743396505e153), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -320)), BOOST_MATH_BIG_CONSTANT(RealType, N, -7.26134976857812288039693249651849278418162312282066e191), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -384)), BOOST_MATH_BIG_CONSTANT(RealType, N, -2.47090928629257240812467048839513197145079590955310e230), tolerance);
+    BOOST_CHECK_CLOSE(quantile(dist, ldexp(static_cast<RealType>(1), -448)), BOOST_MATH_BIG_CONSTANT(RealType, N, -8.40806860386563308738645719151345111937971699513862e268), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_saspoint5_quantile_upper() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    saspoint5_distribution<RealType> dist(static_cast<RealType>(0), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -3))), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.64871492892195438064623231330402224448304864505255e0), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -4))), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.56591342761460650504994018321276991271746451691815e1), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -5))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.52796721097108753422708089760626414214332697170320e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -6))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.31534559333932129219229424636026382581685146602733e2), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -7))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.56685855095635428319020462289713380105568378947326e3), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -8))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.03488985899360430847226178216145298481879311104883e4), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -10))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.66560111810464968102166352094329039797553809013180e5), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -12))), BOOST_MATH_BIG_CONSTANT(RealType, N, 2.66887306794201818857999976857911823063870328368667e6), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -14))), BOOST_MATH_BIG_CONSTANT(RealType, N, 4.27176145418643796488192673589719523675773715325372e7), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -16))), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.83544414827242601690013182749435284100680743652971e8), tolerance);
+    BOOST_CHECK_CLOSE(quantile(complement(dist, ldexp(static_cast<RealType>(1), -20))), BOOST_MATH_BIG_CONSTANT(RealType, N, 1.74992376775466783667682507089110333268769521176506e11), tolerance);
+}
+
+template<class RealType, int N>
+void do_test_saspoint5_locscale_param() {
+    //
+    // Basic sanity checks, tolerance is either 3 epsilon
+    // expressed as a percentage:
+    //
+
+    BOOST_MATH_STD_USING
+    RealType tolerance = boost::math::tools::epsilon<RealType>() * 100 * 3;
+
+    std::cout << "Testing acurracy[%]: " << tolerance << std::endl;
+
+    saspoint5_distribution<RealType> dist_0_1(static_cast<RealType>(0), static_cast<RealType>(1));
+    saspoint5_distribution<RealType> dist_1_3(static_cast<RealType>(1), static_cast<RealType>(3));
+
+    BOOST_CHECK_CLOSE(entropy(dist_0_1), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.6399244456803064957308496039071853510), tolerance);
+    BOOST_CHECK_CLOSE(entropy(dist_1_3), BOOST_MATH_BIG_CONSTANT(RealType, N, 3.6399244456803064957308496039071853510) + log(static_cast<RealType>(3)), tolerance);
+
+    BOOST_CHECK_EQUAL(median(dist_0_1), static_cast<RealType>(0));
+    BOOST_CHECK_EQUAL(median(dist_1_3), static_cast<RealType>(1));
+
+    BOOST_CHECK_EQUAL(mode(dist_0_1), static_cast<RealType>(0));
+    BOOST_CHECK_EQUAL(mode(dist_1_3), static_cast<RealType>(1));
+
+    BOOST_CHECK_CLOSE(pdf(dist_0_1, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 8.61071469126041183247373313827161939453635781053656e-2), tolerance);
+    BOOST_CHECK_CLOSE(pdf(dist_1_3, static_cast<RealType>(1)), BOOST_MATH_BIG_CONSTANT(RealType, N, 6.36619772367581343075535053490057448137838582961826e-1) / 3, tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, static_cast<RealType>(2)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, static_cast<RealType>(7)), BOOST_MATH_BIG_CONSTANT(RealType, N, 7.86071837724616281594369593572177037450866923839017e-1), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.25))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.25), tolerance);
+
+    BOOST_CHECK_CLOSE(cdf(dist_0_1, quantile(dist_0_1, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+    BOOST_CHECK_CLOSE(cdf(dist_1_3, quantile(dist_1_3, static_cast<RealType>(0.75))), BOOST_MATH_BIG_CONSTANT(RealType, N, 0.75), tolerance);
+}
+
+BOOST_AUTO_TEST_CASE(saspoint5_pdf_fp64)
+{
+    do_test_saspoint5_pdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(saspoint5_pdf_std64)
+{
+    do_test_saspoint5_pdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(saspoint5_pdf_fp128)
+{
+    do_test_saspoint5_pdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(saspoint5_cdf_fp64)
+{
+    do_test_saspoint5_cdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(saspoint5_cdf_std64)
+{
+    do_test_saspoint5_cdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(saspoint5_cdf_fp128)
+{
+    do_test_saspoint5_cdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(saspoint5_ccdf_fp64)
+{
+    do_test_saspoint5_ccdf<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(saspoint5_ccdf_std64)
+{
+    do_test_saspoint5_ccdf<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(saspoint5_ccdf_fp128)
+{
+    do_test_saspoint5_ccdf<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_nearzero_fp64)
+{
+    do_test_saspoint5_quantile_nearzero<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_nearzero_std64)
+{
+    do_test_saspoint5_quantile_nearzero<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_nearzero_fp128)
+{
+    do_test_saspoint5_quantile_nearzero<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_lower_fp64)
+{
+    do_test_saspoint5_quantile_lower<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_lower_std64)
+{
+    do_test_saspoint5_quantile_lower<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_lower_fp128)
+{
+    do_test_saspoint5_quantile_lower<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_upper_fp64)
+{
+    do_test_saspoint5_quantile_upper<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_upper_std64)
+{
+    do_test_saspoint5_quantile_upper<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(saspoint5_quantile_upper_fp128)
+{
+    do_test_saspoint5_quantile_upper<cpp_bin_float_quad, 113>();
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(saspoint5_locscale_fp64)
+{
+    do_test_saspoint5_locscale_param<double, 53>();
+}
+
+#ifdef __STDCPP_FLOAT64_T__
+BOOST_AUTO_TEST_CASE(saspoint5_locscale_std64)
+{
+    do_test_saspoint5_locscale_param<std::float64_t, 53>();
+}
+#endif
+
+#ifndef BOOST_MATH_HAS_GPU_SUPPORT
+BOOST_AUTO_TEST_CASE(saspoint5_locscale_fp128)
+{
+    do_test_saspoint5_locscale_param<cpp_bin_float_quad, 113>();
+}
+#endif
diff --git a/test/test_saspoint5_cdf_double.cu b/test/test_saspoint5_cdf_double.cu
new file mode 100644
index 0000000000..fb3e2f74c8
--- /dev/null
+++ b/test/test_saspoint5_cdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::saspoint5_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_saspoint5_cdf_float.cu b/test/test_saspoint5_cdf_float.cu
new file mode 100644
index 0000000000..325a470bba
--- /dev/null
+++ b/test/test_saspoint5_cdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::saspoint5_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_saspoint5_cdf_nvrtc_double.cpp b/test/test_saspoint5_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..ff2067fa0c
--- /dev/null
+++ b/test/test_saspoint5_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/saspoint5.hpp>
+extern "C" __global__ 
+void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_saspoint5_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::saspoint5_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_saspoint5_cdf_nvrtc_float.cpp b/test/test_saspoint5_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..11c816da1c
--- /dev/null
+++ b/test/test_saspoint5_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/saspoint5.hpp>
+extern "C" __global__ 
+void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_saspoint5_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::saspoint5_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_saspoint5_pdf_double.cu b/test/test_saspoint5_pdf_double.cu
new file mode 100644
index 0000000000..5392a328bf
--- /dev/null
+++ b/test/test_saspoint5_pdf_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::saspoint5_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_saspoint5_pdf_float.cu b/test/test_saspoint5_pdf_float.cu
new file mode 100644
index 0000000000..01fbcd472b
--- /dev/null
+++ b/test/test_saspoint5_pdf_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::saspoint5_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_saspoint5_pdf_nvrtc_double.cpp b/test/test_saspoint5_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..4c74443836
--- /dev/null
+++ b/test/test_saspoint5_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/saspoint5.hpp>
+extern "C" __global__ 
+void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_saspoint5_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::saspoint5_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_saspoint5_pdf_nvrtc_float.cpp b/test/test_saspoint5_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..8cd93aaa94
--- /dev/null
+++ b/test/test_saspoint5_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/saspoint5.hpp>
+extern "C" __global__ 
+void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_saspoint5_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::saspoint5_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_saspoint5_quan_double.cu b/test/test_saspoint5_quan_double.cu
new file mode 100644
index 0000000000..7415f06906
--- /dev/null
+++ b/test/test_saspoint5_quan_double.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::saspoint5_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_saspoint5_quan_float.cu b/test/test_saspoint5_quan_float.cu
new file mode 100644
index 0000000000..d6f49084bb
--- /dev/null
+++ b/test/test_saspoint5_quan_float.cu
@@ -0,0 +1,110 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist(0, 1);
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::saspoint5_distribution<float_type>(), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_saspoint5_quan_nvrtc_double.cpp b/test/test_saspoint5_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..631ac4b243
--- /dev/null
+++ b/test/test_saspoint5_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/saspoint5.hpp>
+extern "C" __global__ 
+void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_saspoint5_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::saspoint5_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_saspoint5_quan_nvrtc_float.cpp b/test/test_saspoint5_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..fa152622e6
--- /dev/null
+++ b/test/test_saspoint5_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/saspoint5.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/saspoint5.hpp>
+extern "C" __global__ 
+void test_saspoint5_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::saspoint5_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_saspoint5_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_saspoint5_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_saspoint5_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::saspoint5_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sign.cpp b/test/test_sign.cpp
index 864d2dd121..530a60d503 100644
--- a/test/test_sign.cpp
+++ b/test/test_sign.cpp
@@ -1,5 +1,6 @@
-#define BOOST_TEST_MAIN// Copyright John Maddock 2008
+// Copyright John Maddock 2008
 //  (C) Copyright Paul A. Bristow 2011 (added tests for changesign)
+// Copyright Matt Borland 2024
 // Use, modification and distribution are subject to the
 // Boost Software License, Version 1.0.
 // (See accompanying file LICENSE_1_0.txt
@@ -147,7 +148,9 @@ BOOST_AUTO_TEST_CASE( test_main )
    test_spots(0.0, "double"); // Test double. OK at decdigits 7, tolerance = 1e07 %
    // long double support for the sign functions is considered "core" so we always test it
    // even when long double support is turned off via BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
+#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
    test_spots(0.0L, "long double"); // Test long double.
+#endif
 #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
    test_spots(boost::math::concepts::real_concept(0), "real_concept"); // Test real_concept.
 #endif
diff --git a/test/test_sign_nvrtc_double.cpp b/test/test_sign_nvrtc_double.cpp
new file mode 100644
index 0000000000..0951f9ef68
--- /dev/null
+++ b/test/test_sign_nvrtc_double.cpp
@@ -0,0 +1,193 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/sign.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::signbit(in1[i]) + 
+                 boost::math::changesign(in1[i]) + 
+                 boost::math::copysign(in1[i], in2[i]) +
+                 boost::math::sign(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::signbit(h_in1[i]) + 
+                       boost::math::changesign(h_in1[i]) + 
+                       boost::math::copysign(h_in1[i], h_in2[i]) +
+                       boost::math::sign(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sign_nvrtc_float.cpp b/test/test_sign_nvrtc_float.cpp
new file mode 100644
index 0000000000..6e07f1996a
--- /dev/null
+++ b/test/test_sign_nvrtc_float.cpp
@@ -0,0 +1,193 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/sign.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/sign.hpp>
+extern "C" __global__ 
+void test_gamma_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::signbit(in1[i]) + 
+                 boost::math::changesign(in1[i]) + 
+                 boost::math::copysign(in1[i], in2[i]) +
+                 boost::math::sign(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_gamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_gamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_gamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::signbit(h_in1[i]) + 
+                       boost::math::changesign(h_in1[i]) + 
+                       boost::math::copysign(h_in1[i], h_in2[i]) +
+                       boost::math::sign(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sin_pi_double.cu b/test/test_sin_pi_double.cu
new file mode 100644
index 0000000000..0783d55363
--- /dev/null
+++ b/test/test_sin_pi_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sin_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sin_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sin_pi_float.cu b/test/test_sin_pi_float.cu
new file mode 100644
index 0000000000..9a9f075807
--- /dev/null
+++ b/test/test_sin_pi_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sin_pi(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sin_pi(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sin_pi_nvrtc_double.cpp b/test/test_sin_pi_nvrtc_double.cpp
new file mode 100644
index 0000000000..b6cff9798b
--- /dev/null
+++ b/test/test_sin_pi_nvrtc_double.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/sin_pi.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/special_functions/sin_pi.hpp>
+extern "C" __global__ 
+void test_sin_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sin_pi(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sin_pi_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sin_pi_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sin_pi_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::sin_pi(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sin_pi_nvrtc_float.cpp b/test/test_sin_pi_nvrtc_float.cpp
new file mode 100644
index 0000000000..f67079774f
--- /dev/null
+++ b/test/test_sin_pi_nvrtc_float.cpp
@@ -0,0 +1,186 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/sin_pi.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/special_functions/sin_pi.hpp>
+extern "C" __global__ 
+void test_sin_pi_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sin_pi(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sin_pi_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sin_pi_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sin_pi_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = boost::math::sin_pi(h_in1[i]);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sinh_sinh_quad_double.cu b/test/test_sinh_sinh_quad_double.cu
new file mode 100644
index 0000000000..bf7490fa4b
--- /dev/null
+++ b/test/test_sinh_sinh_quad_double.cu
@@ -0,0 +1,133 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/quadrature/sinh_sinh.hpp>
+#include <boost/math/special_functions.hpp>
+#include <boost/math/tools/precision.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+__host__ __device__ float_type func(float_type x)
+{
+    BOOST_MATH_STD_USING
+    return 1/(1+x*x);
+}
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::quadrature::sinh_sinh<float_type> integrator;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(integrator.integrate(func, tol, &error, &L1));
+    }
+    double t = w.elapsed();
+    // check the results
+    int failed_count = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
+        if (eps > 10)
+        {
+            std::cerr   << std::setprecision(std::numeric_limits<float_type>::digits10)
+                        << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i]
+                        << "\n  Host: " << results[i]
+                        << "\n   Eps: " << eps << "\n";
+            failed_count++;
+        }
+        if (failed_count > 100)
+        {
+            break;
+        }
+    }
+
+    if (failed_count != 0)
+    {
+        std::cout << "Test FAILED" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sinh_sinh_quad_float.cu b/test/test_sinh_sinh_quad_float.cu
new file mode 100644
index 0000000000..b84e316af9
--- /dev/null
+++ b/test/test_sinh_sinh_quad_float.cu
@@ -0,0 +1,133 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/quadrature/sinh_sinh.hpp>
+#include <boost/math/special_functions.hpp>
+#include <boost/math/tools/precision.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+__host__ __device__ float_type func(float_type x)
+{
+    BOOST_MATH_STD_USING
+    return 1/(1+x*x);
+}
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 512;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::quadrature::sinh_sinh<float_type> integrator;
+    for(int i = 0; i < numElements; ++i)
+    {
+       results.push_back(integrator.integrate(func, tol, &error, &L1));
+    }
+    double t = w.elapsed();
+    // check the results
+    int failed_count = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
+        if (eps > 10)
+        {
+            std::cerr   << std::setprecision(std::numeric_limits<float_type>::digits10)
+                        << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i]
+                        << "\n  Host: " << results[i]
+                        << "\n   Eps: " << eps << "\n";
+            failed_count++;
+        }
+        if (failed_count > 100)
+        {
+            break;
+        }
+    }
+
+    if (failed_count != 0)
+    {
+        std::cout << "Test FAILED" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sinh_sinh_quad_nvrtc_double.cpp b/test/test_sinh_sinh_quad_nvrtc_double.cpp
new file mode 100644
index 0000000000..5342e97785
--- /dev/null
+++ b/test/test_sinh_sinh_quad_nvrtc_double.cpp
@@ -0,0 +1,206 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/quadrature/sinh_sinh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <boost/math/quadrature/sinh_sinh.hpp>
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+extern "C" __global__ 
+void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+)";
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function");
+
+        int numElements = 50000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        float_type tol = boost::math::tools::root_epsilon<float_type>();
+        float_type error;
+        float_type L1;
+        boost::math::quadrature::sinh_sinh<float_type> integrator;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = integrator.integrate(func, tol, &error, &L1);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sinh_sinh_quad_nvrtc_float.cpp b/test/test_sinh_sinh_quad_nvrtc_float.cpp
new file mode 100644
index 0000000000..37a8c12525
--- /dev/null
+++ b/test/test_sinh_sinh_quad_nvrtc_float.cpp
@@ -0,0 +1,206 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/quadrature/sinh_sinh.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nvrtc.h>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <boost/math/quadrature/sinh_sinh.hpp>
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+extern "C" __global__ 
+void test_sinh_sinh_kernel(const float_type*, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    float_type tol = boost::math::tools::root_epsilon<float_type>();
+    float_type error;
+    float_type L1;
+    boost::math::size_t levels;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::quadrature::sinh_sinh_integrate(func, tol, &error, &L1, &levels);
+    }
+}
+)";
+
+__host__ __device__ float_type func(float_type x)
+{
+    return 1/(1+x*x);
+}
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sinh_sinh_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sinh_sinh_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sinh_sinh_kernel"), "Failed to get kernel function");
+
+        int numElements = 50000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        float_type tol = boost::math::tools::root_epsilon<float_type>();
+        float_type error;
+        float_type L1;
+        boost::math::quadrature::sinh_sinh<float_type> integrator;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = integrator.integrate(func, tol, &error, &L1);
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_skew_normal.cpp b/test/test_skew_normal.cpp
index 5f0657fbd1..617a8f6fab 100644
--- a/test/test_skew_normal.cpp
+++ b/test/test_skew_normal.cpp
@@ -356,7 +356,7 @@ void test_spots(RealType)
 
         BOOST_CHECK_CLOSE(      // mean:
            mean(dist)
-           , static_cast<RealType>(-0.579908992539856825862549L), tol10 * 2);
+           , static_cast<RealType>(-0.5799089925398568258625490172876619L), tol10 * 2);
 
         std::cout << std::setprecision(17) << "Variance = " << variance(dist) << std::endl;
          BOOST_CHECK_CLOSE(      // variance: N[variance[skewnormaldistribution[1.1, 2.2, -3.3]], 50]
@@ -365,27 +365,27 @@ void test_spots(RealType)
 
         BOOST_CHECK_CLOSE(      // skewness:
            skewness(dist)
-           , static_cast<RealType>(-0.709854548171537509192897824663L), tol100);
+           , static_cast<RealType>(-0.709854548171537509192897824663027155L), tol100);
         BOOST_CHECK_CLOSE(      // kurtosis:
            kurtosis(dist)
-           , static_cast<RealType>(3.5538752625241790601377L), tol100);
+           , static_cast<RealType>(3.55387526252417906013770535120683805L), tol100);
         BOOST_CHECK_CLOSE(      // kurtosis excess:
            kurtosis_excess(dist)
-           , static_cast<RealType>(0.5538752625241790601377L), tol100);
+           , static_cast<RealType>(0.553875262524179060137705351206838143L), tol100);
 
         BOOST_CHECK_CLOSE(
           pdf(dist, static_cast<RealType>(0.4L)),
-          static_cast<RealType>(0.294140110156599539564571L),
+          static_cast<RealType>(0.294140110156599539564571034730246656L),
           tol10);
 
         BOOST_CHECK_CLOSE(
           cdf(dist, static_cast<RealType>(0.4L)),
-          static_cast<RealType>(0.7339186189278737976326676452L),
+          static_cast<RealType>(0.733918618927873797632667645226588243L),
           tol100);
 
         BOOST_CHECK_CLOSE(
           quantile(dist, static_cast<RealType>(0.3L)),
-          static_cast<RealType>(-1.180104068086875314419247L),
+          static_cast<RealType>(-1.18010406808687531441924729956233392L),
           tol100);
 
 
@@ -395,72 +395,63 @@ void test_spots(RealType)
 
        // cout << "pdf(dist, 0) = " << pdf(dist, 0) <<  ", pdf(dist, 0.45) = " << pdf(dist, 0.45) << endl;
        // BOOST_CHECK_CLOSE(mode(dist), boost::math::constants::root_two<RealType>() / 2, tol5);
-        BOOST_CHECK_CLOSE(mode(dist), static_cast<RealType>(0.41697299497388863932L), tol100);
+        BOOST_CHECK_CLOSE(mode(dist), static_cast<RealType>(0.416972994973888639318345129445233074L), tol100);
       }
 
 
       }
-      if(std::numeric_limits< RealType>::digits && (std::numeric_limits<RealType>::digits < 100))
-      {
-        dist = skew_normal_distribution<RealType>(static_cast<RealType>(1.1l), static_cast<RealType>(0.02l), static_cast<RealType>(0.03l));
+      dist = skew_normal_distribution<RealType>(static_cast<RealType>(1.1l), static_cast<RealType>(0.02l), static_cast<RealType>(0.03l));
 
-        BOOST_CHECK_CLOSE(      // mean:
+      BOOST_CHECK_CLOSE(      // mean:
            mean(dist)
-           , static_cast<RealType>(1.1004785154529557886162L), tol10);
-        BOOST_CHECK_CLOSE(      // variance:
+           , static_cast<RealType>(1.1004785154529557886162056250600829L), tol10);
+      BOOST_CHECK_CLOSE(      // variance:
           variance(dist)
-           , static_cast<RealType>(0.00039977102296128251645L), tol10);
+           , static_cast<RealType>(0.000399771022961282516451686289719995601L), tol10);
 
-        BOOST_CHECK_CLOSE(      // skewness:
+      BOOST_CHECK_CLOSE(      // skewness:
            skewness(dist)
-           , static_cast<RealType>(5.8834811259890359782e-006L), tol100);
-        BOOST_CHECK_CLOSE(      // kurtosis:
+           , static_cast<RealType>(5.88348112598903597820852388986073439e-006L), tol100);
+      BOOST_CHECK_CLOSE(      // kurtosis:
            kurtosis(dist)
-           , static_cast<RealType>(3.L + 9.2903475812137800239002e-008L), tol100);
-        BOOST_CHECK_CLOSE(      // kurtosis excess:
+           , static_cast<RealType>(3.L + 9.290347581213780023900209941e-008L), tol100);
+      BOOST_CHECK_CLOSE(      // kurtosis excess:
            kurtosis_excess(dist)
-           , static_cast<RealType>(9.2903475812137800239002e-008L), tol100);
-      }
-      if (std::numeric_limits< RealType>::digits && (std::numeric_limits<RealType>::digits < 100))
-      {
-        dist = skew_normal_distribution<RealType>(static_cast<RealType>(10.1l), static_cast<RealType>(5.l), static_cast<RealType>(-0.03l));
-        BOOST_CHECK_CLOSE(      // mean:
+           , static_cast<RealType>(9.29034758121378002390020993765449518e-008L), tol100);
+      dist = skew_normal_distribution<RealType>(static_cast<RealType>(10.1l), static_cast<RealType>(5.l), static_cast<RealType>(-0.03l));
+      BOOST_CHECK_CLOSE(      // mean:
            mean(dist)
-           , static_cast<RealType>(9.9803711367610528459485937L), tol10);
-        BOOST_CHECK_CLOSE(      // variance:
+           , static_cast<RealType>(9.98037113676105284594859373497928476L), tol10);
+      BOOST_CHECK_CLOSE(      // variance:
           variance(dist)
-           , static_cast<RealType>(24.98568893508015727823L), tol10);
+           , static_cast<RealType>(24.9856889350801572782303931074997234L), tol10);
 
-        BOOST_CHECK_CLOSE(      // skewness:
+      BOOST_CHECK_CLOSE(      // skewness:
            skewness(dist)
-           , static_cast<RealType>(-5.8834811259890359782085e-006L), tol100);
-        BOOST_CHECK_CLOSE(      // kurtosis:
+           , static_cast<RealType>(-5.88348112598903597820852388986073439e-006L), tol100);
+      BOOST_CHECK_CLOSE(      // kurtosis:
            kurtosis(dist)
-           , static_cast<RealType>(3.L + 9.2903475812137800239002e-008L), tol100);
-        BOOST_CHECK_CLOSE(      // kurtosis excess:
+           , static_cast<RealType>(3.L + 9.290347581213780023900209941e-008L), tol100);
+      BOOST_CHECK_CLOSE(      // kurtosis excess:
            kurtosis_excess(dist)
-           , static_cast<RealType>(9.2903475812137800239002e-008L), tol100);
-      }
-      if (std::numeric_limits< RealType>::digits && (std::numeric_limits<RealType>::digits < 100))
-      {
-        dist = skew_normal_distribution<RealType>(static_cast<RealType>(-10.1l), static_cast<RealType>(5.l), static_cast<RealType>(30.l));
-        BOOST_CHECK_CLOSE(      // mean:
+           , static_cast<RealType>(9.29034758121378002390020993765449518e-008L), tol100);
+      dist = skew_normal_distribution<RealType>(static_cast<RealType>(-10.1l), static_cast<RealType>(5.l), static_cast<RealType>(30.l));
+      BOOST_CHECK_CLOSE(      // mean:
            mean(dist)
-           , static_cast<RealType>(-6.11279169674138408531365L), 2 * tol10);
-        BOOST_CHECK_CLOSE(      // variance:
+           , static_cast<RealType>(-6.11279169674138408531365149047090859L), 2 * tol10);
+      BOOST_CHECK_CLOSE(      // variance:
           variance(dist)
-          , static_cast<RealType>(9.10216994642554914628242L), tol10 * 2);
+          , static_cast<RealType>(9.10216994642554914628242097277880642L), tol10 * 2);
 
-        BOOST_CHECK_CLOSE(      // skewness:
+      BOOST_CHECK_CLOSE(      // skewness:
            skewness(dist)
-           , static_cast<RealType>(0.99072425443686904424L), tol100);
-        BOOST_CHECK_CLOSE(      // kurtosis:
+           , static_cast<RealType>(0.990724254436869044244695246354219556L), tol100);
+      BOOST_CHECK_CLOSE(      // kurtosis:
            kurtosis(dist)
-           , static_cast<RealType>(3.L + 0.8638862008406084244563L), tol100);
-        BOOST_CHECK_CLOSE(      // kurtosis excess:
+           , static_cast<RealType>(3.L + 0.8638862008406084244563090239530549L), tol100);
+      BOOST_CHECK_CLOSE(      // kurtosis excess:
            kurtosis_excess(dist)
-           , static_cast<RealType>(0.8638862008406084244563L), tol100);
-      }
+           , static_cast<RealType>(0.863886200840608424456309023953054896L), tol100);
 
       BOOST_MATH_CHECK_THROW(cdf(skew_normal_distribution<RealType>(0, 0, 0), 0), std::domain_error);
       BOOST_MATH_CHECK_THROW(cdf(skew_normal_distribution<RealType>(0, -1, 0), 0), std::domain_error);
diff --git a/test/test_sph_bessel_double.cu b/test/test_sph_bessel_double.cu
new file mode 100644
index 0000000000..5229dd8b5e
--- /dev/null
+++ b/test/test_sph_bessel_double.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const unsigned *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_bessel(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<unsigned> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng {42};
+    std::uniform_int_distribution<unsigned> order(1, 100);
+    std::uniform_real_distribution<float_type> val(0, 100);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = order(rng);
+        input_vector2[i] = val(rng);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sph_bessel(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    bool failed = false;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(output_vector[i]) && std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 3000)
+            {
+                std::cout << "error at line: " << i
+                            << "\nParallel: " << results[i]
+                            << "\n  Serial: " << output_vector[i]
+                            << "\n    Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_bessel_float.cu b/test/test_sph_bessel_float.cu
new file mode 100644
index 0000000000..bd068a1a01
--- /dev/null
+++ b/test/test_sph_bessel_float.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const unsigned *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_bessel(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<unsigned> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::mt19937_64 rng {42};
+    std::uniform_int_distribution<unsigned> order(1, 100);
+    std::uniform_real_distribution<float_type> val(0, 100);
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = order(rng);
+        input_vector2[i] = val(rng);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sph_bessel(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    bool failed = false;
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(output_vector[i]) && std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 150)
+            {
+                std::cout << "error at line: " << i
+                            << "\nParallel: " << results[i]
+                            << "\n  Serial: " << output_vector[i]
+                            << "\n    Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_bessel_nvrtc_double.cpp b/test/test_sph_bessel_nvrtc_double.cpp
new file mode 100644
index 0000000000..e88726ed71
--- /dev/null
+++ b/test/test_sph_bessel_nvrtc_double.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_j_kernel(const unsigned *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_bessel(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        unsigned *h_in1, *d_in1;
+        float_type *h_in2, *h_out;
+        float_type *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new unsigned[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_int_distribution<unsigned> order(1, 100);
+        std::uniform_real_distribution<float_type> val(0.0f, 100.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(order(rng));
+            h_in2[i] = static_cast<float_type>(val(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(unsigned)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(unsigned), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        bool failed = false;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_bessel(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 3000)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                    failed = true;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (failed)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sph_bessel_nvrtc_float.cpp b/test/test_sph_bessel_nvrtc_float.cpp
new file mode 100644
index 0000000000..c9538cd5bf
--- /dev/null
+++ b/test/test_sph_bessel_nvrtc_float.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_cyl_bessel_j_kernel(const unsigned *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_bessel(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_cyl_bessel_j_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_cyl_bessel_j_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_cyl_bessel_j_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        unsigned *h_in1, *d_in1;
+        float_type *h_in2, *h_out;
+        float_type *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new unsigned[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_int_distribution<unsigned> order(1, 100);
+        std::uniform_real_distribution<float_type> val(0.0f, 100.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(order(rng));
+            h_in2[i] = static_cast<float_type>(val(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(unsigned)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(unsigned), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        bool failed = false;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_bessel(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 3000)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                    failed = true;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (failed)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sph_hankel_1_double.cu b/test/test_sph_hankel_1_double.cu
new file mode 100644
index 0000000000..ea9ec23063
--- /dev/null
+++ b/test/test_sph_hankel_1_double.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_1(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::sph_hankel_1(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_hankel_1_float.cu b/test/test_sph_hankel_1_float.cu
new file mode 100644
index 0000000000..4b01fe02a5
--- /dev/null
+++ b/test/test_sph_hankel_1_float.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_1(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::sph_hankel_1(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_hankel_1_nvrtc_double.cpp b/test/test_sph_hankel_1_nvrtc_double.cpp
new file mode 100644
index 0000000000..3ff1da8b20
--- /dev/null
+++ b/test/test_sph_hankel_1_nvrtc_double.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_sph_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_1(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sph_hankel_1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_hankel_1(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sph_hankel_1_nvrtc_float.cpp b/test/test_sph_hankel_1_nvrtc_float.cpp
new file mode 100644
index 0000000000..0b07966537
--- /dev/null
+++ b/test/test_sph_hankel_1_nvrtc_float.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_sph_hankel_1_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_1(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_1_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sph_hankel_1_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_1_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_hankel_1(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sph_hankel_2_double.cu b/test/test_sph_hankel_2_double.cu
new file mode 100644
index 0000000000..6631f73a02
--- /dev/null
+++ b/test/test_sph_hankel_2_double.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_2(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::sph_hankel_2(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_hankel_2_float.cu b/test/test_sph_hankel_2_float.cu
new file mode 100644
index 0000000000..1910aef045
--- /dev/null
+++ b/test/test_sph_hankel_2_float.cu
@@ -0,0 +1,119 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, boost::math::complex<float_type> *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_2(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<boost::math::complex<float_type>> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<boost::math::complex<float_type>> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results[i] = boost::math::sph_hankel_2(input_vector1[i], input_vector2[i]);
+    double t = w.elapsed();
+    // check the results
+    int failure_counter = 0;
+    for(int i = 0; i < numElements; ++i)
+    {
+        const auto eps = boost::math::epsilon_difference(output_vector[i].real(), results[i].real());
+        if (eps > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!\n"
+                        << "Device: " << output_vector[i].real() << ", " << output_vector[i].imag()
+                        << "\n  Host: " << results[i].real() << ", " << results[i].imag()
+                        << "\n   Eps: " << eps << std::endl;
+            ++failure_counter;
+            if (failure_counter > 100)
+            {
+                break;
+            }
+        }
+    }
+
+    if (failure_counter > 0)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_hankel_2_nvrtc_double.cpp b/test/test_sph_hankel_2_nvrtc_double.cpp
new file mode 100644
index 0000000000..fa57fcbb16
--- /dev/null
+++ b/test/test_sph_hankel_2_nvrtc_double.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_sph_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_2(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_2_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sph_hankel_2_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_2_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_hankel_2(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sph_hankel_2_nvrtc_float.cpp b/test/test_sph_hankel_2_nvrtc_float.cpp
new file mode 100644
index 0000000000..be6fd0d097
--- /dev/null
+++ b/test/test_sph_hankel_2_nvrtc_float.cpp
@@ -0,0 +1,199 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/tools/complex.hpp>
+#include <boost/math/special_functions/hankel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/hankel.hpp>
+extern "C" __global__ 
+void test_sph_hankel_2_kernel(const float_type *in1, const float_type* in2, boost::math::complex<float_type> *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_hankel_2(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_hankel_2_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sph_hankel_2_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_hankel_2_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2;
+        float_type *d_in1, *d_in2;
+        boost::math::complex<float_type> *h_out, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new boost::math::complex<float_type>[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(boost::math::complex<float_type>)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(boost::math::complex<float_type>), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        int fail_counter = 0;
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_hankel_2(h_in1[i], h_in2[i]);
+            if (boost::math::epsilon_difference(res.real(), h_out[i].real()) > 300)
+            {
+                std::cout << "error at line: " << i
+                        << "\nParallel: " << h_out[i].real() << ", " << h_out[i].imag()
+                        << "\n  Serial: " << res.real() << ", " << res.imag()
+                        << "\n    Dist: " << boost::math::epsilon_difference(res.real(), h_out[i].real()) << std::endl;
+                ++fail_counter;
+                if (fail_counter > 100)
+                {
+                    break;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        if (fail_counter > 0)
+        {
+            return 1;
+        }
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sph_neumann_double.cu b/test/test_sph_neumann_double.cu
new file mode 100644
index 0000000000..f59dc7acca
--- /dev/null
+++ b/test/test_sph_neumann_double.cu
@@ -0,0 +1,116 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_neumann(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sph_neumann(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(output_vector[i]) && std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000)
+            {
+                std::cout << "error at line: " << i
+                            << "\nParallel: " << results[i]
+                            << "\n  Serial: " << output_vector[i]
+                            << "\n    Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_neumann_float.cu b/test/test_sph_neumann_float.cu
new file mode 100644
index 0000000000..a295e376f6
--- /dev/null
+++ b/test/test_sph_neumann_float.cu
@@ -0,0 +1,116 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_neumann(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::sph_neumann(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    bool failed = false;
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(output_vector[i]) && std::isfinite(results[i]))
+        {
+            if (boost::math::epsilon_difference(output_vector[i], results[i]) > 5000)
+            {
+                std::cout << "error at line: " << i
+                            << "\nParallel: " << results[i]
+                            << "\n  Serial: " << output_vector[i]
+                            << "\n    Dist: " << boost::math::epsilon_difference(output_vector[i], results[i]) << std::endl;
+                failed = true;
+            }
+        }
+    }
+
+    if (failed)
+    {
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_sph_neumann_nvrtc_double.cpp b/test/test_sph_neumann_nvrtc_double.cpp
new file mode 100644
index 0000000000..61dcb07ddc
--- /dev/null
+++ b/test/test_sph_neumann_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_sph_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_neumann(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_neumann_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sph_neumann_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_neumann_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_neumann(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_sph_neumann_nvrtc_float.cpp b/test/test_sph_neumann_nvrtc_float.cpp
new file mode 100644
index 0000000000..5d7ae59fee
--- /dev/null
+++ b/test/test_sph_neumann_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/bessel.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/bessel.hpp>
+extern "C" __global__ 
+void test_sph_neumann_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::sph_neumann(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_sph_neumann_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_sph_neumann_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_sph_neumann_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::sph_neumann(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_students_t.cpp b/test/test_students_t.cpp
index ad4dc4187b..b6aec11f44 100644
--- a/test/test_students_t.cpp
+++ b/test/test_students_t.cpp
@@ -18,13 +18,19 @@
 #  pragma warning (disable :4127) // conditional expression is constant.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/special_functions/next.hpp>  // for has_denorm_now
 
+#include "../include_private/boost/math/tools/test.hpp"
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
-#include <boost/math/tools/test.hpp> // for real_concept
+#endif
+
 #include "test_out_of_range.hpp"
 #include <boost/math/distributions/students_t.hpp>
     using boost::math::students_t_distribution;
@@ -35,6 +41,7 @@
    using std::setprecision;
 #include <limits>
   using std::numeric_limits;
+#include <type_traits>
 
 template <class RealType>
 RealType naive_pdf(RealType v, RealType t)
@@ -528,7 +535,10 @@ void test_spots(RealType)
 
     std::string type = typeid(RealType).name();
 //    if (type != "class boost::math::concepts::real_concept") fails for gcc
-    if (typeid(RealType) != typeid(boost::math::concepts::real_concept))
+
+    #ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
+    BOOST_MATH_IF_CONSTEXPR(!std::is_same<RealType, boost::math::concepts::real_concept>::value)
+    #endif
     { // Ordinary floats only.
       RealType limit = 1/ boost::math::tools::epsilon<RealType>();
       // Default policy to get full accuracy.
diff --git a/test/test_students_t_cdf_double.cu b/test/test_students_t_cdf_double.cu
new file mode 100644
index 0000000000..e8f47faa25
--- /dev/null
+++ b/test/test_students_t_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::students_t_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_students_t_cdf_float.cu b/test/test_students_t_cdf_float.cu
new file mode 100644
index 0000000000..22fd5d7c01
--- /dev/null
+++ b/test/test_students_t_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::students_t_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_students_t_cdf_nvrtc_double.cpp b/test/test_students_t_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..c88bdd18d5
--- /dev/null
+++ b/test/test_students_t_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/students_t.hpp>
+extern "C" __global__ 
+void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_students_t_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::students_t_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_students_t_cdf_nvrtc_float.cpp b/test/test_students_t_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..b5c1b37229
--- /dev/null
+++ b/test/test_students_t_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/students_t.hpp>
+extern "C" __global__ 
+void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_students_t_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::students_t_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_students_t_pdf_double.cu b/test/test_students_t_pdf_double.cu
new file mode 100644
index 0000000000..187f63ff52
--- /dev/null
+++ b/test/test_students_t_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::students_t_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_students_t_pdf_float.cu b/test/test_students_t_pdf_float.cu
new file mode 100644
index 0000000000..ba0469b0e3
--- /dev/null
+++ b/test/test_students_t_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::students_t_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_students_t_pdf_nvrtc_double.cpp b/test/test_students_t_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..e67eb382a0
--- /dev/null
+++ b/test/test_students_t_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/students_t.hpp>
+extern "C" __global__ 
+void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_students_t_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::students_t_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_students_t_pdf_nvrtc_float.cpp b/test/test_students_t_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..e0c9556840
--- /dev/null
+++ b/test/test_students_t_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/students_t.hpp>
+extern "C" __global__ 
+void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_students_t_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::students_t_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_students_t_quan_double.cu b/test/test_students_t_quan_double.cu
new file mode 100644
index 0000000000..fe6d999528
--- /dev/null
+++ b/test/test_students_t_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::students_t_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_students_t_quan_float.cu b/test/test_students_t_quan_float.cu
new file mode 100644
index 0000000000..6293ec3f83
--- /dev/null
+++ b/test/test_students_t_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::students_t_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_students_t_quan_nvrtc_double.cpp b/test/test_students_t_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..e5b5f60f38
--- /dev/null
+++ b/test/test_students_t_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/students_t.hpp>
+extern "C" __global__ 
+void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_students_t_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::students_t_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_students_t_quan_nvrtc_float.cpp b/test/test_students_t_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..db41964a93
--- /dev/null
+++ b/test/test_students_t_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/students_t.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/students_t.hpp>
+extern "C" __global__ 
+void test_students_t_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::students_t_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_students_t_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_students_t_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_students_t_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::students_t_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_tgamma_double.cu b/test/test_tgamma_double.cu
new file mode 100644
index 0000000000..6e4140ab6e
--- /dev/null
+++ b/test/test_tgamma_double.cu
@@ -0,0 +1,102 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::tgamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_tgamma_float.cu b/test/test_tgamma_float.cu
new file mode 100644
index 0000000000..cb2d01482d
--- /dev/null
+++ b/test/test_tgamma_float.cu
@@ -0,0 +1,102 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::tgamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_tgamma_ratio_double.cu b/test/test_tgamma_ratio_double.cu
new file mode 100644
index 0000000000..059e1c3c67
--- /dev/null
+++ b/test/test_tgamma_ratio_double.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma_ratio(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::tgamma_ratio(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_tgamma_ratio_float.cu b/test/test_tgamma_ratio_float.cu
new file mode 100644
index 0000000000..dc669bd7fb
--- /dev/null
+++ b/test/test_tgamma_ratio_float.cu
@@ -0,0 +1,104 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma_ratio(in1[i], in2[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed input vector B
+    cuda_managed_ptr<float_type> input_vector2(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = rand()/(float_type)RAND_MAX;
+        input_vector2[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch CUDA kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::tgamma_ratio(input_vector1[i], input_vector2[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_tgamma_ratio_nvrtc_double.cpp b/test/test_tgamma_ratio_nvrtc_double.cpp
new file mode 100644
index 0000000000..5b0c3b1e67
--- /dev/null
+++ b/test/test_tgamma_ratio_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_tgamma_ratio_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma_ratio(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_tgamma_ratio_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_tgamma_ratio_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_tgamma_ratio_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::tgamma_ratio(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_tgamma_ratio_nvrtc_float.cpp b/test/test_tgamma_ratio_nvrtc_float.cpp
new file mode 100644
index 0000000000..ab1bf339b4
--- /dev/null
+++ b/test/test_tgamma_ratio_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/gamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/gamma.hpp>
+extern "C" __global__ 
+void test_tgamma_ratio_kernel(const float_type *in1, const float_type *in2, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::tgamma_ratio(in1[i], in2[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_tgamma_ratio_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_tgamma_ratio_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_tgamma_ratio_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::tgamma_ratio(h_in1[i], h_in2[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_triangular.cpp b/test/test_triangular.cpp
index 8efb17d85c..d8f37b8520 100644
--- a/test/test_triangular.cpp
+++ b/test/test_triangular.cpp
@@ -8,21 +8,28 @@
 
 // test_triangular.cpp
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp>
+#endif
 
 #ifdef _MSC_VER
 #  pragma warning(disable: 4127) // conditional expression is constant.
 #  pragma warning(disable: 4305) // truncation from 'long double' to 'float'
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/math/distributions/triangular.hpp>
 using boost::math::triangular_distribution;
-#include <boost/math/tools/test.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/math/special_functions/fpclassify.hpp>
 #include "test_out_of_range.hpp"
 
@@ -463,8 +470,11 @@ void test_spots(RealType)
     BOOST_CHECK_CLOSE_FRACTION(
       mode(tridef), static_cast<RealType>(0), tolerance);
     // skewness:
+    // On device the result does not get flushed exactly to zero so the eps difference is by default huge
+    #ifndef BOOST_MATH_HAS_GPU_SUPPORT
     BOOST_CHECK_CLOSE_FRACTION(
       median(tridef), static_cast<RealType>(0), tolerance);
+    #endif
     // https://reference.wolfram.com/language/ref/Skewness.html  skewness{-1, 0, +1} = 0
     // skewness[triangulardistribution{-1, 0, +1}] does not compute a result.
     // skewness[triangulardistribution{0, +1}] result == 0
diff --git a/test/test_triangular_cdf_double.cu b/test/test_triangular_cdf_double.cu
new file mode 100644
index 0000000000..38affb91bd
--- /dev/null
+++ b/test/test_triangular_cdf_double.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::triangular_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::triangular_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_triangular_cdf_float.cu b/test/test_triangular_cdf_float.cu
new file mode 100644
index 0000000000..c1bb22bd3f
--- /dev/null
+++ b/test/test_triangular_cdf_float.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::triangular_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::triangular_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_triangular_cdf_nvrtc_double.cpp b/test/test_triangular_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f23009d92d
--- /dev/null
+++ b/test/test_triangular_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/triangular.hpp>
+extern "C" __global__ 
+void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::triangular_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_triangular_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::triangular_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_triangular_cdf_nvrtc_float.cpp b/test/test_triangular_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..e17f5c8146
--- /dev/null
+++ b/test/test_triangular_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/triangular.hpp>
+extern "C" __global__ 
+void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::triangular_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_triangular_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::triangular_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_triangular_pdf_double.cu b/test/test_triangular_pdf_double.cu
new file mode 100644
index 0000000000..38050faff8
--- /dev/null
+++ b/test/test_triangular_pdf_double.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::triangular_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::triangular_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_triangular_pdf_float.cu b/test/test_triangular_pdf_float.cu
new file mode 100644
index 0000000000..82e1be5fcc
--- /dev/null
+++ b/test/test_triangular_pdf_float.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::triangular_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::triangular_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_triangular_pdf_nvrtc_double.cpp b/test/test_triangular_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..4fd23a51dc
--- /dev/null
+++ b/test/test_triangular_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/triangular.hpp>
+extern "C" __global__ 
+void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::triangular_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_triangular_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::triangular_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_triangular_pdf_nvrtc_float.cpp b/test/test_triangular_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..17128139cc
--- /dev/null
+++ b/test/test_triangular_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/triangular.hpp>
+extern "C" __global__ 
+void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::triangular_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_triangular_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::triangular_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_triangular_quan_double.cu b/test/test_triangular_quan_double.cu
new file mode 100644
index 0000000000..5751ead020
--- /dev/null
+++ b/test/test_triangular_quan_double.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::triangular_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::triangular_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_triangular_quan_float.cu b/test/test_triangular_quan_float.cu
new file mode 100644
index 0000000000..579e10fd54
--- /dev/null
+++ b/test/test_triangular_quan_float.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::triangular_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::triangular_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_triangular_quan_nvrtc_double.cpp b/test/test_triangular_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..c41b3a11bc
--- /dev/null
+++ b/test/test_triangular_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/triangular.hpp>
+extern "C" __global__ 
+void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::triangular_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_triangular_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::triangular_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_triangular_quan_nvrtc_float.cpp b/test/test_triangular_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..256a7f2d28
--- /dev/null
+++ b/test/test_triangular_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/triangular.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/triangular.hpp>
+extern "C" __global__ 
+void test_triangular_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::triangular_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_triangular_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_triangular_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_triangular_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::triangular_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_trigamma.cpp b/test/test_trigamma.cpp
index dd89898d76..85ba8078af 100644
--- a/test/test_trigamma.cpp
+++ b/test/test_trigamma.cpp
@@ -1,9 +1,13 @@
 //  (C) Copyright John Maddock 2014.
+//  (C) Copyright Matt Borland 2024.
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch_light.hpp>
+#endif
+
 #include "test_trigamma.hpp"
 
 void expected_results()
diff --git a/test/test_trigamma.hpp b/test/test_trigamma.hpp
index 94a1290327..49b1bd5501 100644
--- a/test/test_trigamma.hpp
+++ b/test/test_trigamma.hpp
@@ -1,4 +1,5 @@
-// Copyright John Maddock 2014
+//  Copyright John Maddock 2014
+//  Copyright Matt Borland 2024
 //  Use, modification and distribution are subject to the
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
@@ -9,9 +10,10 @@
 #include <boost/math/special_functions/math_fwd.hpp>
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp>
+#include <boost/math/special_functions/trigamma.hpp>
+#include "../include_private/boost/math/tools/test.hpp"
 #include <boost/test/tools/floating_point_comparison.hpp>
 #include <boost/math/tools/stats.hpp>
-#include <boost/math/tools/test.hpp>
 #include <boost/math/constants/constants.hpp>
 #include <boost/type_traits/is_floating_point.hpp>
 #include <boost/array.hpp>
diff --git a/test/test_trigamma_double.cu b/test/test_trigamma_double.cu
new file mode 100644
index 0000000000..6780e3e924
--- /dev/null
+++ b/test/test_trigamma_double.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trigamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::trigamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_trigamma_float.cu b/test/test_trigamma_float.cu
new file mode 100644
index 0000000000..a407a0eb18
--- /dev/null
+++ b/test/test_trigamma_float.cu
@@ -0,0 +1,100 @@
+
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trigamma(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(boost::math::trigamma(input_vector[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_trigamma_nvrtc_double.cpp b/test/test_trigamma_nvrtc_double.cpp
new file mode 100644
index 0000000000..46877acce1
--- /dev/null
+++ b/test/test_trigamma_nvrtc_double.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/trigamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/trigamma.hpp>
+extern "C" __global__ 
+void test_trigamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::trigamma(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trigamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_trigamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_trigamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::trigamma(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_trigamma_nvrtc_float.cpp b/test/test_trigamma_nvrtc_float.cpp
new file mode 100644
index 0000000000..083c7d8767
--- /dev/null
+++ b/test/test_trigamma_nvrtc_float.cpp
@@ -0,0 +1,190 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/trigamma.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/trigamma.hpp>
+extern "C" __global__ 
+void test_trigamma_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::trigamma(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trigamma_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_trigamma_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_trigamma_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::trigamma(h_in1[i]);
+            
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_trunc_double.cu b/test/test_trunc_double.cu
new file mode 100644
index 0000000000..5a2d7b622b
--- /dev/null
+++ b/test/test_trunc_double.cu
@@ -0,0 +1,97 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::trunc(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_trunc_float.cu b/test/test_trunc_float.cu
new file mode 100644
index 0000000000..d6fe4d3525
--- /dev/null
+++ b/test/test_trunc_float.cu
@@ -0,0 +1,97 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = boost::math::trunc(in[i]) + boost::math::itrunc(in[i]) + boost::math::ltrunc(in[i]) + boost::math::lltrunc(in[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector addition of " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> h_A(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> h_C(numElements);
+
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        h_A[i] = rand()/(float_type)RAND_MAX;
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 1024;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(h_A.get(), h_C.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(4 * boost::math::trunc(h_A[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(h_C[i], results[i]) > 10)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_trunc_nvrtc_double.cpp b/test/test_trunc_nvrtc_double.cpp
new file mode 100644
index 0000000000..1aab64887b
--- /dev/null
+++ b/test/test_trunc_nvrtc_double.cpp
@@ -0,0 +1,196 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/trunc.hpp>
+extern "C" __global__ 
+void test_trunc_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::trunc(in1[i]) + 
+                 boost::math::itrunc(in1[i]) +
+                 boost::math::ltrunc(in1[i]) + 
+                 boost::math::lltrunc(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trunc_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_trunc_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_trunc_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::trunc(h_in1[i]) + 
+                             boost::math::itrunc(h_in1[i]) +
+                             boost::math::ltrunc(h_in1[i]) + 
+                             boost::math::lltrunc(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_trunc_nvrtc_float.cpp b/test/test_trunc_nvrtc_float.cpp
new file mode 100644
index 0000000000..13ad4bc51b
--- /dev/null
+++ b/test/test_trunc_nvrtc_float.cpp
@@ -0,0 +1,196 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/special_functions/trunc.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/special_functions/trunc.hpp>
+extern "C" __global__ 
+void test_trunc_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = boost::math::trunc(in1[i]) + 
+                 boost::math::itrunc(in1[i]) +
+                 boost::math::ltrunc(in1[i]) + 
+                 boost::math::lltrunc(in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_trunc_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_trunc_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_trunc_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1000.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            const auto res = boost::math::trunc(h_in1[i]) + 
+                             boost::math::itrunc(h_in1[i]) +
+                             boost::math::ltrunc(h_in1[i]) + 
+                             boost::math::lltrunc(h_in1[i]);
+                       
+            if (std::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_uniform.cpp b/test/test_uniform.cpp
index 65d192a4d6..4e034cab74 100644
--- a/test/test_uniform.cpp
+++ b/test/test_uniform.cpp
@@ -8,21 +8,28 @@
 
 // test_uniform.cpp
 
+#ifndef SYCL_LANGUAGE_VERSION
 #include <pch.hpp>
+#endif
 
 #ifdef _MSC_VER
 #  pragma warning(disable: 4127) // conditional expression is constant.
 #  pragma warning(disable: 4100) // unreferenced formal parameter.
 #endif
 
+#include <boost/math/tools/config.hpp>
+
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
+
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/math/distributions/uniform.hpp>
     using boost::math::uniform_distribution;
-#include <boost/math/tools/test.hpp> 
+#include "../include_private/boost/math/tools/test.hpp"
 #include "test_out_of_range.hpp"
 
 #include <iostream>
diff --git a/test/test_uniform_cdf_double.cu b/test/test_uniform_cdf_double.cu
new file mode 100644
index 0000000000..beb98c34dd
--- /dev/null
+++ b/test/test_uniform_cdf_double.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::uniform_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::uniform_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_uniform_cdf_float.cu b/test/test_uniform_cdf_float.cu
new file mode 100644
index 0000000000..7aef4a6be1
--- /dev/null
+++ b/test/test_uniform_cdf_float.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::uniform_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::uniform_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_uniform_cdf_nvrtc_double.cpp b/test/test_uniform_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..1b6b563a05
--- /dev/null
+++ b/test/test_uniform_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/uniform.hpp>
+extern "C" __global__ 
+void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::uniform_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_uniform_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::uniform_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_uniform_cdf_nvrtc_float.cpp b/test/test_uniform_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..6ba98900f9
--- /dev/null
+++ b/test/test_uniform_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/uniform.hpp>
+extern "C" __global__ 
+void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::uniform_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_uniform_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::uniform_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_uniform_pdf_double.cu b/test/test_uniform_pdf_double.cu
new file mode 100644
index 0000000000..6b1cf83e0d
--- /dev/null
+++ b/test/test_uniform_pdf_double.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::uniform_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::uniform_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_uniform_pdf_float.cu b/test/test_uniform_pdf_float.cu
new file mode 100644
index 0000000000..4b003d22a1
--- /dev/null
+++ b/test/test_uniform_pdf_float.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::uniform_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::uniform_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_uniform_pdf_nvrtc_double.cpp b/test/test_uniform_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..f638c43961
--- /dev/null
+++ b/test/test_uniform_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/uniform.hpp>
+extern "C" __global__ 
+void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::uniform_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_uniform_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::uniform_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_uniform_pdf_nvrtc_float.cpp b/test/test_uniform_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..81da44417a
--- /dev/null
+++ b/test/test_uniform_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/uniform.hpp>
+extern "C" __global__ 
+void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::uniform_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_uniform_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::uniform_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_uniform_quan_double.cu b/test/test_uniform_quan_double.cu
new file mode 100644
index 0000000000..ab11374754
--- /dev/null
+++ b/test/test_uniform_quan_double.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::uniform_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::uniform_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_uniform_quan_float.cu b/test/test_uniform_quan_float.cu
new file mode 100644
index 0000000000..7a7e4ccf50
--- /dev/null
+++ b/test/test_uniform_quan_float.cu
@@ -0,0 +1,113 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::uniform_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::uniform_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (std::isfinite(results[i]))
+        {
+          if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+          {
+              std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+              std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+              return EXIT_FAILURE;
+          }
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
diff --git a/test/test_uniform_quan_nvrtc_double.cpp b/test/test_uniform_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..13b8c6d230
--- /dev/null
+++ b/test/test_uniform_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/uniform.hpp>
+extern "C" __global__ 
+void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::uniform_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_uniform_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::uniform_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_uniform_quan_nvrtc_float.cpp b/test/test_uniform_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..57372dee3f
--- /dev/null
+++ b/test/test_uniform_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/uniform.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/uniform.hpp>
+extern "C" __global__ 
+void test_uniform_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::uniform_distribution<float_type>(), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_uniform_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_uniform_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_uniform_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::uniform_distribution<float_type>(), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_weibull.cpp b/test/test_weibull.cpp
index 4b31a7f0b7..dc509b742b 100644
--- a/test/test_weibull.cpp
+++ b/test/test_weibull.cpp
@@ -12,15 +12,17 @@
 #  pragma warning (disable : 4127) //  conditional expression is constant.
 #endif
 
-
+#include <boost/math/tools/config.hpp>
+#ifndef BOOST_MATH_NO_REAL_CONCEPT_TESTS
 #include <boost/math/concepts/real_concept.hpp> // for real_concept
+#endif
 #define BOOST_TEST_MAIN
 #include <boost/test/unit_test.hpp> // Boost.Test
 #include <boost/test/tools/floating_point_comparison.hpp>
 
 #include <boost/math/distributions/weibull.hpp>
     using boost::math::weibull_distribution;
-#include <boost/math/tools/test.hpp> 
+#include "../include_private/boost/math/tools/test.hpp"
 #include "test_out_of_range.hpp"
 
 #include <iostream>
diff --git a/test/test_weibull_cdf_double.cu b/test/test_weibull_cdf_double.cu
new file mode 100644
index 0000000000..1b2e5cf0db
--- /dev/null
+++ b/test/test_weibull_cdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::weibull_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_weibull_cdf_float.cu b/test/test_weibull_cdf_float.cu
new file mode 100644
index 0000000000..76bf3a4e1c
--- /dev/null
+++ b/test/test_weibull_cdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(cdf(boost::math::weibull_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_weibull_cdf_nvrtc_double.cpp b/test/test_weibull_cdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..60d5ff5afb
--- /dev/null
+++ b/test/test_weibull_cdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/weibull.hpp>
+extern "C" __global__ 
+void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_weibull_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::weibull_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_weibull_cdf_nvrtc_float.cpp b/test/test_weibull_cdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..5085b2f7dd
--- /dev/null
+++ b/test/test_weibull_cdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/weibull.hpp>
+extern "C" __global__ 
+void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = cdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_weibull_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = cdf(boost::math::weibull_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_weibull_pdf_double.cu b/test/test_weibull_pdf_double.cu
new file mode 100644
index 0000000000..dd48b57d60
--- /dev/null
+++ b/test/test_weibull_pdf_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::weibull_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_weibull_pdf_float.cu b/test/test_weibull_pdf_float.cu
new file mode 100644
index 0000000000..40064b1ed7
--- /dev/null
+++ b/test/test_weibull_pdf_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(pdf(boost::math::weibull_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_weibull_pdf_nvrtc_double.cpp b/test/test_weibull_pdf_nvrtc_double.cpp
new file mode 100644
index 0000000000..2e5e237b20
--- /dev/null
+++ b/test/test_weibull_pdf_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/weibull.hpp>
+extern "C" __global__ 
+void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_weibull_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::weibull_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_weibull_pdf_nvrtc_float.cpp b/test/test_weibull_pdf_nvrtc_float.cpp
new file mode 100644
index 0000000000..6c3c5202c1
--- /dev/null
+++ b/test/test_weibull_pdf_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/weibull.hpp>
+extern "C" __global__ 
+void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = pdf(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_weibull_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = pdf(boost::math::weibull_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_weibull_quan_double.cu b/test/test_weibull_quan_double.cu
new file mode 100644
index 0000000000..9263fb5365
--- /dev/null
+++ b/test/test_weibull_quan_double.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef double float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::weibull_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_weibull_quan_float.cu b/test/test_weibull_quan_float.cu
new file mode 100644
index 0000000000..5dd6bd6eef
--- /dev/null
+++ b/test/test_weibull_quan_float.cu
@@ -0,0 +1,109 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+typedef float float_type;
+
+/**
+ * CUDA Kernel Device code
+ *
+ */
+__global__ void cuda_test(const float_type *in1, float_type *out, int numElements)
+{
+    using std::cos;
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+  try{
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector A
+    cuda_managed_ptr<float_type> input_vector1(numElements);
+
+    // Allocate the managed output vector C
+    cuda_managed_ptr<float_type> output_vector(numElements);
+
+    boost::random::mt19937 gen;
+    boost::random::uniform_real_distribution<float_type> dist;
+    // Initialize the input vectors
+    for (int i = 0; i < numElements; ++i)
+    {
+        input_vector1[i] = dist(gen);
+    }
+
+    // Launch the Vector Add CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+    std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<float_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+       results.push_back(quantile(boost::math::weibull_distribution<float_type>(1), input_vector1[i]));
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (boost::math::epsilon_difference(output_vector[i], results[i]) > 100.0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            std::cerr << "Error rate was: " << boost::math::epsilon_difference(output_vector[i], results[i]) << "eps" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+  }
+  catch(const std::exception& e)
+  {
+    std::cerr << "Stopped with exception: " << e.what() << std::endl;
+  }
+  return 0;
+}
\ No newline at end of file
diff --git a/test/test_weibull_quan_nvrtc_double.cpp b/test/test_weibull_quan_nvrtc_double.cpp
new file mode 100644
index 0000000000..aed31865e8
--- /dev/null
+++ b/test/test_weibull_quan_nvrtc_double.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef double float_type;
+
+const char* cuda_kernel = R"(
+typedef double float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/weibull.hpp>
+extern "C" __global__ 
+void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_weibull_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::weibull_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/test/test_weibull_quan_nvrtc_float.cpp b/test/test_weibull_quan_nvrtc_float.cpp
new file mode 100644
index 0000000000..98997b354b
--- /dev/null
+++ b/test/test_weibull_quan_nvrtc_float.cpp
@@ -0,0 +1,191 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#define BOOST_MATH_OVERFLOW_ERROR_POLICY ignore_error
+#define BOOST_MATH_PROMOTE_DOUBLE_POLICY false
+
+// Must be included first
+#include <nvrtc.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <exception>
+#include <boost/math/distributions/weibull.hpp>
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <boost/math/special_functions/relative_difference.hpp>
+
+typedef float float_type;
+
+const char* cuda_kernel = R"(
+typedef float float_type;
+#include <cuda/std/type_traits>
+#include <boost/math/distributions/weibull.hpp>
+extern "C" __global__ 
+void test_weibull_kernel(const float_type *in1, const float_type*, float_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements)
+    {
+        out[i] = quantile(boost::math::weibull_distribution<float_type>(1), in1[i]);
+    }
+}
+)";
+
+void checkCUDAError(cudaError_t result, const char* msg)
+{
+    if (result != cudaSuccess)
+    {
+        std::cerr << msg << ": " << cudaGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkCUError(CUresult result, const char* msg)
+{
+    if (result != CUDA_SUCCESS)
+    {
+        const char* errorStr;
+        cuGetErrorString(result, &errorStr);
+        std::cerr << msg << ": " << errorStr << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+void checkNVRTCError(nvrtcResult result, const char* msg)
+{
+    if (result != NVRTC_SUCCESS)
+    {
+        std::cerr << msg << ": " << nvrtcGetErrorString(result) << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+int main() 
+{
+    try
+    {
+        // Initialize CUDA driver API
+        checkCUError(cuInit(0), "Failed to initialize CUDA");
+
+        // Create CUDA context
+        CUcontext context;
+        CUdevice device;
+        checkCUError(cuDeviceGet(&device, 0), "Failed to get CUDA device");
+        checkCUError(cuCtxCreate(&context, 0, device), "Failed to create CUDA context");
+
+        nvrtcProgram prog;
+        nvrtcResult res;
+
+        res = nvrtcCreateProgram(&prog, cuda_kernel, "test_weibull_kernel.cu", 0, nullptr, nullptr);
+        checkNVRTCError(res, "Failed to create NVRTC program");
+
+        nvrtcAddNameExpression(prog, "test_weibull_kernel");
+
+        #ifdef BOOST_MATH_NVRTC_CI_RUN
+        const char* opts[] = {"--std=c++14", "--gpu-architecture=compute_75", "--include-path=/home/runner/work/cuda-math/boost-root/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #else
+        const char* opts[] = {"--std=c++14", "--include-path=/home/mborland/Documents/boost/libs/cuda-math/include/", "-I/usr/local/cuda/include"};
+        #endif
+
+        // Compile the program
+        res = nvrtcCompileProgram(prog, sizeof(opts) / sizeof(const char*), opts);
+        if (res != NVRTC_SUCCESS) 
+        {
+            size_t log_size;
+            nvrtcGetProgramLogSize(prog, &log_size);
+            char* log = new char[log_size];
+            nvrtcGetProgramLog(prog, log);
+            std::cerr << "Compilation failed:\n" << log << std::endl;
+            delete[] log;
+            exit(EXIT_FAILURE);
+        }
+
+        // Get PTX from the program
+        size_t ptx_size;
+        nvrtcGetPTXSize(prog, &ptx_size);
+        char* ptx = new char[ptx_size];
+        nvrtcGetPTX(prog, ptx);
+
+        // Load PTX into CUDA module
+        CUmodule module;
+        CUfunction kernel;
+        checkCUError(cuModuleLoadDataEx(&module, ptx, 0, 0, 0), "Failed to load module");
+        checkCUError(cuModuleGetFunction(&kernel, module, "test_weibull_kernel"), "Failed to get kernel function");
+
+        int numElements = 5000;
+        float_type *h_in1, *h_in2, *h_out;
+        float_type *d_in1, *d_in2, *d_out;
+
+        // Allocate memory on the host
+        h_in1 = new float_type[numElements];
+        h_in2 = new float_type[numElements];
+        h_out = new float_type[numElements];
+
+        // Initialize input arrays
+        std::mt19937_64 rng(42);
+        std::uniform_real_distribution<float_type> dist(0.0f, 1.0f);
+        for (int i = 0; i < numElements; ++i) 
+        {
+            h_in1[i] = static_cast<float_type>(dist(rng));
+            h_in2[i] = static_cast<float_type>(dist(rng));
+        }
+
+        checkCUDAError(cudaMalloc(&d_in1, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in1");
+        checkCUDAError(cudaMalloc(&d_in2, numElements * sizeof(float_type)), "Failed to allocate device memory for d_in2");
+        checkCUDAError(cudaMalloc(&d_out, numElements * sizeof(float_type)), "Failed to allocate device memory for d_out");
+
+        checkCUDAError(cudaMemcpy(d_in1, h_in1, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in1");
+        checkCUDAError(cudaMemcpy(d_in2, h_in2, numElements * sizeof(float_type), cudaMemcpyHostToDevice), "Failed to copy data to device for d_in2");
+
+        int blockSize = 256;
+        int numBlocks = (numElements + blockSize - 1) / blockSize;
+        void* args[] = { &d_in1, &d_in2, &d_out, &numElements };
+        checkCUError(cuLaunchKernel(kernel, numBlocks, 1, 1, blockSize, 1, 1, 0, 0, args, 0), "Kernel launch failed");
+
+        checkCUDAError(cudaMemcpy(h_out, d_out, numElements * sizeof(float_type), cudaMemcpyDeviceToHost), "Failed to copy data back to host for h_out");
+
+        // Verify Result
+        for (int i = 0; i < numElements; ++i) 
+        {
+            auto res = quantile(boost::math::weibull_distribution<float_type>(1), h_in1[i]);
+            
+            if (boost::math::isfinite(res))
+            {
+                if (boost::math::epsilon_difference(res, h_out[i]) > 300)
+                {
+                    std::cout << "error at line: " << i
+                            << "\nParallel: " << h_out[i]
+                            << "\n  Serial: " << res
+                            << "\n    Dist: " << boost::math::epsilon_difference(res, h_out[i]) << std::endl;
+                }
+            }
+        }
+
+        cudaFree(d_in1);
+        cudaFree(d_in2);
+        cudaFree(d_out);
+        delete[] h_in1;
+        delete[] h_in2;
+        delete[] h_out;
+
+        nvrtcDestroyProgram(&prog);
+        delete[] ptx;
+
+        cuCtxDestroy(context);
+
+        std::cout << "Kernel executed successfully." << std::endl;
+        return 0;
+    }
+    catch(const std::exception& e)
+    {
+        std::cerr << "Stopped with exception: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    }
+}
diff --git a/tools/Jamfile.v2 b/tools/Jamfile.v2
index 4a986620b7..56155ec3cc 100644
--- a/tools/Jamfile.v2
+++ b/tools/Jamfile.v2
@@ -1,6 +1,6 @@
 # Copyright John Maddock 2010
-# Distributed under the Boost Software License, Version 1.0. 
-# (See accompanying file LICENSE_1_0.txt or copy at 
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE_1_0.txt or copy at
 # http://www.boost.org/LICENSE_1_0.txt.
 # \math_toolkit\libs\math\test\jamfile.v2
 # Runs all math toolkit tests, functions & distributions,
@@ -9,45 +9,47 @@
 # bring in the rules for testing
 import modules ;
 import path ;
-import ../../config/checks/config : requires ;
+import-search /boost/config/checks ;
+import config : requires ;
 
-project  
-    : requirements 
+project
+    : requirements
       <toolset>gcc:<cxxflags>-Wno-missing-braces
       <toolset>darwin:<cxxflags>-Wno-missing-braces
       <toolset>acc:<cxxflags>+W2068,2461,2236,4070,4069
-      <toolset>intel-win:<cxxflags>-nologo 
-      <toolset>intel-win:<linkflags>-nologo 
+      <toolset>intel-win:<cxxflags>-nologo
+      <toolset>intel-win:<linkflags>-nologo
       <toolset>msvc:<warnings>all
       <toolset>msvc:<asynch-exceptions>on
       <toolset>msvc:<cxxflags>/wd4996
-      <toolset>msvc:<cxxflags>/wd4512 
-      <toolset>msvc:<cxxflags>/wd4610 
-      <toolset>msvc:<cxxflags>/wd4510 
-      <toolset>msvc:<cxxflags>/wd4127 
+      <toolset>msvc:<cxxflags>/wd4512
+      <toolset>msvc:<cxxflags>/wd4610
+      <toolset>msvc:<cxxflags>/wd4510
+      <toolset>msvc:<cxxflags>/wd4127
       <toolset>msvc:<cxxflags>/wd4701 # needed for lexical cast - temporary.
       <link>static
       <toolset>borland:<runtime-link>static
-      <include>../../..
       <define>BOOST_ALL_NO_LIB=1
       <define>BOOST_UBLAS_UNSUPPORTED_COMPILER=0
       <include>.
       <include>../include_private
+      <library>/boost/multiprecision//boost_multiprecision
+      <library>/boost/algorithm//boost_algorithm
     ;
 
-lib gmp ;
-lib mpfr ;
-lib mpfi ;
-lib quadmath ;
+searched-lib gmp : : <link>shared ;
+searched-lib mpfr : : <link>shared ;
+searched-lib mpfi : : <link>shared ;
+searched-lib quadmath : : <link>shared ;
 
 exe bessel_data : bessel_data.cpp :
 [ check-target-builds ../config//is_ci_standalone_run : <build>no ] ;
 install bessel_data_install : bessel_data : <location>bin ;
 
-exe ellint_f_data : ellint_f_data.cpp ;
+exe ellint_f_data : ellint_f_data.cpp /boost/test//included ;
 install ellint_f_data_install : ellint_f_data : <location>bin ;
 
-exe heuman_lambda_data : heuman_lambda_data.cpp ;
+exe heuman_lambda_data : heuman_lambda_data.cpp /boost/test//included ;
 install heuman_lambda_data_install : heuman_lambda_data : <location>bin ;
 
 exe hyp_2f2_data : hyp_2f2_data.cpp ;
@@ -56,24 +58,24 @@ install hyp_2f2_data_install : hyp_2f2_data : <location>bin ;
 exe laguerre_data : laguerre_data.cpp ;
 install laguerre_data_install : laguerre_data : <location>bin ;
 
-exe bessel_derivative_data : bessel_derivative_data.cpp :
-[ check-target-builds ../../multiprecision/config//has_gmp : <source>gmp : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfr : <source>mpfr : <build>no ] 
+exe bessel_derivative_data : bessel_derivative_data.cpp /boost/test//included :
+[ check-target-builds /boost/multiprecision/config//has_gmp : <source>gmp : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfr : <source>mpfr : <build>no ]
 [ check-target-builds ../config//is_ci_standalone_run : <build>no ] ;
 install bessel_derivative_data_install : bessel_derivative_data : <location>bin ;
 
-exe ellint_k_data : ellint_k_data.cpp ;
+exe ellint_k_data : ellint_k_data.cpp /boost/test//included ;
 install ellint_k_data_install : ellint_k_data : <location>bin ;
 
 exe hyp_0f2_data : hyp_0f2_data.cpp ;
 install hyp_0f2_data_install : hyp_0f2_data : <location>bin ;
 
-exe hypergeometric_dist_data : hypergeometric_dist_data.cpp : 
-[ check-target-builds ../config//is_ci_standalone_run : <build>no ] 
+exe hypergeometric_dist_data : hypergeometric_dist_data.cpp :
+[ check-target-builds ../config//is_ci_standalone_run : <build>no ]
 [ requires cxx11_hdr_random ] ;
 install hypergeometric_dist_data_install : hypergeometric_dist_data : <location>bin ;
 
-exe legendre_data : legendre_data.cpp : 
+exe legendre_data : legendre_data.cpp :
 [ check-target-builds ../config//is_ci_standalone_run : <build>no ] ;
 install legendre_data_install : legendre_data : <location>bin ;
 
@@ -81,13 +83,13 @@ exe beta_data : beta_data.cpp :
 [ check-target-builds ../config//is_ci_standalone_run : <build>no ] ;
 install beta_data_install : beta_data : <location>bin ;
 
-exe ellint_pi2_data : ellint_pi2_data.cpp ;
+exe ellint_pi2_data : ellint_pi2_data.cpp /boost/test//included ;
 install ellint_pi2_data_install : ellint_pi2_data : <location>bin ;
 
-exe hyp_1f1_big_data : hyp_1f1_big_data.cpp : 
-[ check-target-builds ../../multiprecision/config//has_gmp : <source>gmp : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfr : <source>mpfr : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ] 
+exe hyp_1f1_big_data : hyp_1f1_big_data.cpp :
+[ check-target-builds /boost/multiprecision/config//has_gmp : <source>gmp : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfr : <source>mpfr : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ]
 [ requires cxx11_decltype ] ;
 install hyp_1f1_big_data_install : hyp_1f1_big_data : <location>bin ;
 
@@ -98,68 +100,68 @@ install ibeta_data_install : ibeta_data : <location>bin ;
 exe log1p_expm1_data : log1p_expm1_data.cpp ;
 install log1p_expm1_data_install : log1p_expm1_data : <location>bin ;
 
-exe carlson_ellint_data : carlson_ellint_data.cpp ;
+exe carlson_ellint_data : carlson_ellint_data.cpp /boost/test//included ;
 install carlson_ellint_data_install : carlson_ellint_data : <location>bin ;
 
-exe ellint_pi3_data : ellint_pi3_data.cpp ;
+exe ellint_pi3_data : ellint_pi3_data.cpp /boost/test//included ;
 install ellint_pi3_data_install : ellint_pi3_data : <location>bin ;
 
-exe hyp_1f1_data : hyp_1f1_data.cpp : 
-[ check-target-builds ../../multiprecision/config//has_gmp : <source>gmp : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfr : <source>mpfr : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ] 
+exe hyp_1f1_data : hyp_1f1_data.cpp :
+[ check-target-builds /boost/multiprecision/config//has_gmp : <source>gmp : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfr : <source>mpfr : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ]
 [ requires cxx11_decltype ] ;
 install hyp_1f1_data_install : hyp_1f1_data : <location>bin ;
 
-exe ibeta_derivative_data : ibeta_derivative_data.cpp :
+exe ibeta_derivative_data : ibeta_derivative_data.cpp /boost/math//testing :
 [ check-target-builds ../config//is_ci_standalone_run : <build>no ] ;
 install ibeta_derivative_data_install : ibeta_derivative_data : <location>bin ;
 
-exe sinc_data : sinc_data.cpp ;
+exe sinc_data : sinc_data.cpp /boost/test//included ;
 install sinc_data_install : sinc_data : <location>bin ;
 
 exe cbrt_data : cbrt_data.cpp ;
 install cbrt_data_install : cbrt_data : <location>bin ;
 
-exe erf_data : erf_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
+exe erf_data : erf_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
 install erf_data_install : erf_data : <location>bin ;
 
-exe hyp_1f1_log_big_data : hyp_1f1_log_big_data.cpp : 
-[ check-target-builds ../../multiprecision/config//has_gmp : <source>gmp : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfr : <source>mpfr : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ] 
+exe hyp_1f1_log_big_data : hyp_1f1_log_big_data.cpp :
+[ check-target-builds /boost/multiprecision/config//has_gmp : <source>gmp : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfr : <source>mpfr : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ]
 [ requires cxx11_decltype ] ;
 install hyp_1f1_log_big_data_install : hyp_1f1_log_big_data : <location>bin ;
 
-exe ibeta_inv_data : ibeta_inv_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
+exe ibeta_inv_data : ibeta_inv_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
 install ibeta_inv_data_install : ibeta_inv_data : <location>bin ;
 
 exe spherical_harmonic_data : spherical_harmonic_data.cpp :
 [ check-target-builds ../config//is_ci_standalone_run : <build>no ] ;
 install spherical_harmonic_data_install : spherical_harmonic_data : <location>bin ;
 
-exe digamma_data : digamma_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
+exe digamma_data : digamma_data.cpp /boost/test//included : [ check-target-builds /boost/multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
 install digamma_data_install : digamma_data : <location>bin ;
 
 exe expint_data : expint_data.cpp ;
 install expint_data_install : expint_data : <location>bin ;
 
-exe hyp_1f1_reg_big_data : hyp_1f1_reg_big_data.cpp : 
-[ check-target-builds ../../multiprecision/config//has_gmp : <source>gmp : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfr : <source>mpfr : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ] 
+exe hyp_1f1_reg_big_data : hyp_1f1_reg_big_data.cpp :
+[ check-target-builds /boost/multiprecision/config//has_gmp : <source>gmp : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfr : <source>mpfr : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfi : <source>gmp <source>mpfr <source>mpfi : <build>no ]
 [ requires cxx11_decltype ] ;
 install hyp_1f1_reg_big_data_install : hyp_1f1_reg_big_data : <location>bin ;
 
-exe ibeta_invab_data : ibeta_invab_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
+exe ibeta_invab_data : ibeta_invab_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
 install ibeta_invab_data_install : ibeta_invab_data : <location>bin ;
 
-exe tgamma_large_data : tgamma_large_data.cpp : 
-[ check-target-builds ../../multiprecision/config//has_gmp : <source>gmp : <build>no ] 
-[ check-target-builds ../../multiprecision/config//has_mpfr : <source>mpfr : <build>no ] ;
+exe tgamma_large_data : tgamma_large_data.cpp /boost/test//included :
+[ check-target-builds /boost/multiprecision/config//has_gmp : <source>gmp : <build>no ]
+[ check-target-builds /boost/multiprecision/config//has_mpfr : <source>mpfr : <build>no ] ;
 install tgamma_large_data_install : tgamma_large_data : <location>bin ;
 
-exe ellint_d_data : ellint_d_data.cpp ;
+exe ellint_d_data : ellint_d_data.cpp /boost/test//included ;
 install ellint_d_data_install : ellint_d_data : <location>bin ;
 
 exe expint_i_data : expint_i_data.cpp ;
@@ -168,17 +170,17 @@ install expint_i_data_install : expint_i_data : <location>bin ;
 exe hyp_1f2_data : hyp_1f2_data.cpp ;
 install hyp_1f2_data_install : hyp_1f2_data : <location>bin ;
 
-exe igamma_data : igamma_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
+exe igamma_data : igamma_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
 install igamma_data_install : igamma_data : <location>bin ;
 
-exe tgamma_ratio_data : tgamma_ratio_data.cpp : 
+exe tgamma_ratio_data : tgamma_ratio_data.cpp :
 [ check-target-builds ../config//is_ci_standalone_run : <build>no ] ;
 install tgamma_ratio_data_install : tgamma_ratio_data : <location>bin ;
 
-exe ellint_d2_data : ellint_d2_data.cpp ;
+exe ellint_d2_data : ellint_d2_data.cpp /boost/test//included ;
 install ellint_d2_data_install : ellint_d2_data : <location>bin ;
 
-exe gamma_P_inva_data : gamma_P_inva_data.cpp : [ check-target-builds ../../multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
+exe gamma_P_inva_data : gamma_P_inva_data.cpp : [ check-target-builds /boost/multiprecision/config//has_float128 : <source>quadmath : <build>no ] ;
 install gamma_P_inva_data_install : gamma_P_inva_data : <location>bin ;
 
 exe hyp_2f0_data : hyp_2f0_data.cpp ;
@@ -190,7 +192,7 @@ install inv_hyp_data_install : inv_hyp_data : <location>bin ;
 exe trig_data : trig_data.cpp ;
 install trig_data_install : trig_data : <location>bin ;
 
-exe ellint_e_data : ellint_e_data.cpp ;
+exe ellint_e_data : ellint_e_data.cpp /boost/test//included ;
 install ellint_e_data_install : ellint_e_data : <location>bin ;
 
 exe hermite_data : hermite_data.cpp ;
@@ -199,10 +201,10 @@ install hermite_data_install : hermite_data : <location>bin ;
 exe hyp_2f1_data : hyp_2f1_data.cpp ;
 install hyp_2f1_data_install : hyp_2f1_data : <location>bin ;
 
-exe jacobi_theta_data : jacobi_theta_data.cpp ;
+exe jacobi_theta_data : jacobi_theta_data.cpp /boost/test//included ;
 install jacobi_theta_data_install : jacobi_theta_data : <location>bin ;
 
-exe jacobi_zeta_data : jacobi_zeta_data.cpp ;
+exe jacobi_zeta_data : jacobi_zeta_data.cpp /boost/test//included ;
 install jacobi_zeta_data_install : jacobi_zeta_data : <location>bin ;
 
 exe zeta_data : zeta_data.cpp :
@@ -215,8 +217,8 @@ install generate_test_values_install : generate_test_values : <location>bin ;
 exe igamma_temme_large_coef : igamma_temme_large_coef.cpp ;
 install igamma_temme_large_coef_install : igamma_temme_large_coef : <location>bin ;
 
-exe lanczos_generator : lanczos_generator.cpp ../../chrono/build//boost_chrono ../../system/build//boost_system : 
-[ check-target-builds ../../multiprecision/config//has_float128 : <source>quadmath : <build>no ] 
+exe lanczos_generator : lanczos_generator.cpp /boost/chrono//boost_chrono /boost/system//boost_system :
+[ check-target-builds /boost/multiprecision/config//has_float128 : <source>quadmath : <build>no ]
 [ requires cxx11_nullptr ] ;
 install lanczos_generator_install : lanczos_generator : <location>bin ;
 
@@ -228,11 +230,11 @@ install generate_rational_test_install : generate_rational_test : <location>bin
 
 #for local source in [ glob *_data.cpp ] generate_test_values.cpp igamma_temme_large_coef.cpp lanczos_generator.cpp factorial_tables.cpp generate_rational_test.cpp
 #{
-#   exe $(source:B) : $(source) : [ check-target-builds ../../multiprecision/config//has_gmp : <define>HAS_GMP <source>gmp : <build>no ] [ check-target-builds ../../multiprecision/config//has_mpfr : <define>HAS_MPFR <source>mpfr : <build>no ] [ check-target-builds ../../multiprecision/config//has_mpfi : <define>HAS_MPFI <source>gmp <source>mpfr <source>mpfi ] ;
+#   exe $(source:B) : $(source) : [ check-target-builds /boost/multiprecision/config//has_gmp : <define>HAS_GMP <source>gmp : <build>no ] [ check-target-builds /boost/multiprecision/config//has_mpfr : <define>HAS_MPFR <source>mpfr : <build>no ] [ check-target-builds /boost/multiprecision/config//has_mpfi : <define>HAS_MPFI <source>gmp <source>mpfr <source>mpfi ] ;
 #   install $(source:B)_bin : $(source:B) : <location>bin ;
 #}
 
 exe generate_rational_code : generate_rational_code.cpp ;
-exe process_perf_results : process_perf_results.cpp ;
+exe process_perf_results : process_perf_results.cpp /boost/format//boost_format ;
 
 install bin : generate_rational_code process_perf_results ;
diff --git a/tools/generate_rational_code.cpp b/tools/generate_rational_code.cpp
index 2da7e000ea..20ffde8503 100644
--- a/tools/generate_rational_code.cpp
+++ b/tools/generate_rational_code.cpp
@@ -40,13 +40,13 @@ void print_polynomials(int max_order)
          "#define BOOST_MATH_TOOLS_POLY_EVAL_" << i << "_HPP\n\n"
          "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*)\n"
          "{\n"
          "   return static_cast<V>(0);\n"
          "}\n"
          "\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*)\n"
          "{\n"
          "   return static_cast<V>(a[0]);\n"
          "}\n\n";
@@ -55,7 +55,7 @@ void print_polynomials(int max_order)
       {
          ofs << 
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, " << order << ">*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, " << order << ">*)\n"
          "{\n"
          "   return static_cast<V>(";
          
@@ -90,28 +90,28 @@ void print_polynomials(int max_order)
          "#define BOOST_MATH_TOOLS_POLY_EVAL_" << i << "_HPP\n\n"
          "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*)\n"
          "{\n"
          "   return static_cast<V>(0);\n"
          "}\n"
          "\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*)\n"
          "{\n"
          "   return static_cast<V>(a[0]);\n"
          "}\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*)\n"
          "{\n"
          "   return static_cast<V>(a[1] * x + a[0]);\n"
          "}\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*)\n"
          "{\n"
          "   return static_cast<V>((a[2] * x + a[1]) * x + a[0]);\n"
          "}\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*)\n"
          "{\n"
          "   return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);\n"
          "}\n\n";
@@ -120,7 +120,7 @@ void print_polynomials(int max_order)
       {
          ofs << 
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, " << order << ">*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, " << order << ">*)\n"
          "{\n"
          "   V x2 = x * x;\n"
          "   return static_cast<V>(";
@@ -186,28 +186,28 @@ void print_polynomials(int max_order)
          "#define BOOST_MATH_TOOLS_POLY_EVAL_" << i << "_HPP\n\n"
          "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T*, const V&, const std::integral_constant<int, 0>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T*, const V&, const boost::math::integral_constant<int, 0>*)\n"
          "{\n"
          "   return static_cast<V>(0);\n"
          "}\n"
          "\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V&, const std::integral_constant<int, 1>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V&, const boost::math::integral_constant<int, 1>*)\n"
          "{\n"
          "   return static_cast<V>(a[0]);\n"
          "}\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 2>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 2>*)\n"
          "{\n"
          "   return static_cast<V>(a[1] * x + a[0]);\n"
          "}\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 3>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 3>*)\n"
          "{\n"
          "   return static_cast<V>((a[2] * x + a[1]) * x + a[0]);\n"
          "}\n\n"
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, 4>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, 4>*)\n"
          "{\n"
          "   return static_cast<V>(((a[3] * x + a[2]) * x + a[1]) * x + a[0]);\n"
          "}\n\n";
@@ -216,7 +216,7 @@ void print_polynomials(int max_order)
       {
          ofs << 
          "template <class T, class V>\n"
-         "inline V evaluate_polynomial_c_imp(const T* a, const V& x, const std::integral_constant<int, " << order << ">*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_polynomial_c_imp(const T* a, const V& x, const boost::math::integral_constant<int, " << order << ">*)\n"
          "{\n"
          "   V x2 = x * x;\n"
          "   V t[2];\n";
@@ -281,13 +281,13 @@ void print_rationals(int max_order)
          "#define BOOST_MATH_TOOLS_POLY_RAT_" << i << "_HPP\n\n"
          "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*)\n"
          "{\n"
          "   return static_cast<V>(0);\n"
          "}\n"
          "\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*)\n"
          "{\n"
          "   return static_cast<V>(a[0]) / static_cast<V>(b[0]);\n"
          "}\n\n";
@@ -296,7 +296,7 @@ void print_rationals(int max_order)
       {
          ofs << 
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, " << order << ">*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, " << order << ">*)\n"
          "{\n"
          "   if((-1 <= x) && (x <= 1))\n"
          "     return static_cast<V>((";
@@ -361,28 +361,28 @@ void print_rationals(int max_order)
          "#define BOOST_MATH_TOOLS_RAT_EVAL_" << i << "_HPP\n\n"
          "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*)\n"
          "{\n"
          "   return static_cast<V>(0);\n"
          "}\n"
          "\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*)\n"
          "{\n"
          "   return static_cast<V>(a[0]) / static_cast<V>(b[0]);\n"
          "}\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*)\n"
          "{\n"
          "   return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));\n"
          "}\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*)\n"
          "{\n"
          "   return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));\n"
          "}\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*)\n"
          "{\n"
          "   return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));\n"
          "}\n\n";
@@ -391,7 +391,7 @@ void print_rationals(int max_order)
       {
          ofs << 
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, " << order << ">*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, " << order << ">*)\n"
          "{\n"
          "   if((-1 <= x) && (x <= 1))\n   {\n"
          "      V x2 = x * x;\n"
@@ -577,28 +577,28 @@ void print_rationals(int max_order)
          "#define BOOST_MATH_TOOLS_RAT_EVAL_" << i << "_HPP\n\n"
          "namespace boost{ namespace math{ namespace tools{ namespace detail{\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T*, const U*, const V&, const std::integral_constant<int, 0>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T*, const U*, const V&, const boost::math::integral_constant<int, 0>*)\n"
          "{\n"
          "   return static_cast<V>(0);\n"
          "}\n"
          "\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const std::integral_constant<int, 1>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V&, const boost::math::integral_constant<int, 1>*)\n"
          "{\n"
          "   return static_cast<V>(a[0]) / static_cast<V>(b[0]);\n"
          "}\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 2>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 2>*)\n"
          "{\n"
          "   return static_cast<V>((a[1] * x + a[0]) / (b[1] * x + b[0]));\n"
          "}\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 3>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 3>*)\n"
          "{\n"
          "   return static_cast<V>(((a[2] * x + a[1]) * x + a[0]) / ((b[2] * x + b[1]) * x + b[0]));\n"
          "}\n\n"
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, 4>*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, 4>*)\n"
          "{\n"
          "   return static_cast<V>((((a[3] * x + a[2]) * x + a[1]) * x + a[0]) / (((b[3] * x + b[2]) * x + b[1]) * x + b[0]));\n"
          "}\n\n";
@@ -607,7 +607,7 @@ void print_rationals(int max_order)
       {
          ofs << 
          "template <class T, class U, class V>\n"
-         "inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const std::integral_constant<int, " << order << ">*)\n"
+         "BOOST_MATH_GPU_ENABLED inline V evaluate_rational_c_imp(const T* a, const U* b, const V& x, const boost::math::integral_constant<int, " << order << ">*)\n"
          "{\n"
          "   if((-1 <= x) && (x <= 1))\n   {\n"
          "      V x2 = x * x;\n"
diff --git a/tools/ibeta_derivative_data.cpp b/tools/ibeta_derivative_data.cpp
index f00fe46785..27d647410d 100644
--- a/tools/ibeta_derivative_data.cpp
+++ b/tools/ibeta_derivative_data.cpp
@@ -17,11 +17,11 @@ using namespace boost::math::tools;
 using namespace boost::math;
 using namespace std;
 
-#include <libs/math/test/table_type.hpp>
+#include <table_type.hpp>
 
 #define T double
 #define SC_(x) static_cast<double>(x)
-#include <libs/math/test/ibeta_int_data.ipp>
+#include <ibeta_int_data.ipp>
 
 int main(int, char* [])
 {
diff --git a/tools/nc_t_data.cpp b/tools/nc_t_data.cpp
index 90b9e61558..ff3b994739 100644
--- a/tools/nc_t_data.cpp
+++ b/tools/nc_t_data.cpp
@@ -24,7 +24,7 @@
 #include <boost/multiprecision/mpfr.hpp>
 #include <fstream>
 
-#include <libs/math/test/table_type.hpp>
+#include <table_type.hpp>
 
 using namespace boost::math::tools;
 using namespace boost::math;
@@ -91,7 +91,7 @@ int main(int, char* [])
    boost::math::quadrature::exp_sinh<big_t> integrator(10);
    using T = float;
 
-#include <libs/math/test/nct.ipp>
+#include <nct.ipp>
 
 
    for (unsigned i = 0; i < nct.size(); ++i)
@@ -127,7 +127,7 @@ int main(int, char* [])
       std::cout << cdf << "), SC_(" << ccdf << ") }}," << std::endl;
    }
 
-#include <libs/math/test/nct_small_delta.ipp>
+#include <nct_small_delta.ipp>
    for (unsigned i = 0; i < nct_small_delta.size(); ++i)
    {
       big_t error1, error2;